diff --git "a/app.py" "b/app.py"
new file mode 100644--- /dev/null
+++ "b/app.py"
@@ -0,0 +1,5191 @@
+"""
+Quran Aligner — Automatic Quran recitation segmentation and alignment.
+"""
+import os
+import sys
+import json
+import time
+import unicodedata
+from pathlib import Path
+
+# Add paths for imports BEFORE importing anything else
+_app_path = Path(__file__).parent.resolve()
+sys.path.insert(0, str(_app_path))
+
+# Build Cython extensions in-place (falls back to pure Python if it fails)
+import subprocess
+subprocess.run(
+    [sys.executable, str(_app_path / "setup.py"), "build_ext", "--inplace"],
+    cwd=str(_app_path),
+    capture_output=True,
+)
+
+import gradio as gr
+import numpy as np
+import librosa
+
+from config import (
+    PORT, IS_HF_SPACE,
+    get_vad_duration, get_asr_duration,
+    MIN_SILENCE_MIN, MIN_SILENCE_MAX, MIN_SILENCE_STEP,
+    MIN_SPEECH_MIN, MIN_SPEECH_MAX, MIN_SPEECH_STEP,
+    PAD_MIN, PAD_MAX, PAD_STEP,
+    PRESET_MUJAWWAD, PRESET_MURATTAL, PRESET_FAST,
+    CONFIDENCE_HIGH, CONFIDENCE_MED, REVIEW_SUMMARY_MAX_SEGMENTS,
+    UNDERSEG_MIN_WORDS, UNDERSEG_MIN_AYAH_SPAN, UNDERSEG_MIN_DURATION,
+    QURAN_TEXT_SIZE_PX, ARABIC_WORD_SPACING,
+    SURAH_INFO_PATH,
+    PHONEME_ALIGNMENT_PROFILING,
+    RESAMPLE_TYPE,
+    SEGMENT_AUDIO_DIR, DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE, AUDIO_PRELOAD_COUNT,
+    ANIM_WORD_COLOR, ANIM_STYLE_ROW_SCALES,
+    ANIM_DISPLAY_MODES, ANIM_DISPLAY_MODE_DEFAULT, ANIM_OPACITY_PREV_DEFAULT, ANIM_OPACITY_AFTER_DEFAULT, ANIM_OPACITY_STEP, ANIM_PRESETS,
+    ANIM_GRANULARITIES, ANIM_GRANULARITY_DEFAULT,
+    ANIM_WINDOW_PREV_DEFAULT, ANIM_WINDOW_AFTER_DEFAULT,
+    ANIM_WINDOW_PREV_MIN, ANIM_WINDOW_PREV_MAX,
+    ANIM_WINDOW_AFTER_MIN, ANIM_WINDOW_AFTER_MAX,
+    MEGA_WORD_SPACING_MIN, MEGA_WORD_SPACING_MAX, MEGA_WORD_SPACING_STEP, MEGA_WORD_SPACING_DEFAULT,
+    MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
+    MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
+    MEGA_SURAH_LIGATURE_SIZE,
+    PROGRESS_PROCESS_AUDIO, PROGRESS_RESEGMENT, PROGRESS_RETRANSCRIBE,
+    MFA_SPACE_URL, MFA_TIMEOUT, MFA_PROGRESS_SEGMENT_RATE,
+    LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
+)
+from src.zero_gpu import gpu_with_fallback, ZERO_GPU_AVAILABLE, is_quota_exhausted, is_user_forced_cpu, get_quota_reset_time
+from src.segment_processor import (
+    load_segmenter,
+    ensure_models_on_gpu,
+    detect_speech_segments,
+    run_phoneme_matching,
+    test_vad_aoti_export,
+    apply_aoti_compiled,
+    VadSegment, SegmentInfo, ProfilingData
+)
+from config import ANCHOR_SEGMENTS
+from data.font_data import DIGITAL_KHATT_FONT_B64, SURAH_NAME_FONT_B64
+
+# Load surah name ligature map
+with open(Path(__file__).parent / "data" / "ligatures.json") as _f:
+    _SURAH_LIGATURES = json.load(_f)
+
+
+# =============================================================================
+# GPU-decorated processing functions
+# =============================================================================
+
+def _combined_duration(audio, sample_rate, *_args, **_kwargs):
+    """Lease duration for VAD+ASR: sum of independent estimates."""
+    minutes = len(audio) / sample_rate / 60
+    model_name = _args[3] if len(_args) > 3 else _kwargs.get("model_name", "Base")
+    return get_vad_duration(minutes) + get_asr_duration(minutes, model_name)
+
+def _asr_only_duration(segment_audios, sample_rate, *_args, **_kwargs):
+    """Lease duration for standalone ASR."""
+    minutes = sum(len(s) for s in segment_audios) / sample_rate / 60
+    model_name = _args[0] if _args else _kwargs.get("model_name", "Base")
+    return get_asr_duration(minutes, model_name)
+
+
+def _run_asr_core(segment_audios, sample_rate, model_name="Base"):
+    """Core ASR logic: load, move to GPU, transcribe. No GPU decorator."""
+    from src.alignment.phoneme_asr import load_phoneme_asr, transcribe_batch
+
+    t_gpu_start = time.time()
+    load_phoneme_asr(model_name)
+    t_move = time.time()
+    ensure_models_on_gpu(asr_model_name=model_name)
+    gpu_move_time = time.time() - t_move
+    print(f"[PHONEME ASR] GPU move: {gpu_move_time:.3f}s")
+    results, batch_profiling, sorting_time, batch_build_time = transcribe_batch(segment_audios, sample_rate, model_name)
+    gpu_time = time.time() - t_gpu_start
+    return results, batch_profiling, sorting_time, batch_build_time, gpu_move_time, gpu_time
+
+
+@gpu_with_fallback(duration=_combined_duration)
+def run_vad_and_asr_gpu(audio, sample_rate, min_silence_ms, min_speech_ms, pad_ms, model_name="Base"):
+    """Single GPU lease: VAD segmentation + Phoneme ASR."""
+    t_gpu_start = time.time()
+
+    # --- VAD phase ---
+    load_segmenter()
+    vad_move_time = ensure_models_on_gpu()
+    intervals, vad_profiling, raw_speech_intervals, raw_is_complete = detect_speech_segments(audio, sample_rate, min_silence_ms, min_speech_ms, pad_ms)
+    vad_profiling["model_move_time"] = vad_move_time
+    vad_gpu_time = time.time() - t_gpu_start
+
+    if not intervals:
+        return (intervals, vad_profiling, vad_gpu_time, raw_speech_intervals, raw_is_complete,
+                None, None, None, None, 0.0, 0.0)
+
+    # --- ASR phase ---
+    segment_audios = [audio[int(s * sample_rate):int(e * sample_rate)] for s, e in intervals]
+    asr_results = _run_asr_core(segment_audios, sample_rate, model_name)
+
+    return (intervals, vad_profiling, vad_gpu_time, raw_speech_intervals, raw_is_complete, *asr_results)
+
+
+@gpu_with_fallback(duration=_asr_only_duration)
+def run_phoneme_asr_gpu(segment_audios, sample_rate, model_name="Base"):
+    """Standalone ASR GPU lease (used by resegment/retranscribe paths)."""
+    return _run_asr_core(segment_audios, sample_rate, model_name)
+
+
+@gpu_with_fallback(duration=lambda: 300)  # 5 min lease for compilation test
+def test_aoti_compilation_gpu():
+    """
+    Test AoT compilation for VAD model on GPU.
+    Called at startup to verify torch.export works.
+    """
+    load_segmenter()
+    ensure_models_on_gpu()
+    return test_vad_aoti_export()
+
+
+# =============================================================================
+# Segment rendering
+# =============================================================================
+
+def format_timestamp(seconds: float) -> str:
+    """Format seconds as MM:SS.ms"""
+    minutes = int(seconds // 60)
+    secs = seconds % 60
+    return f"{minutes}:{secs:04.1f}"
+
+
+def get_confidence_class(score: float) -> str:
+    """Get CSS class based on confidence score."""
+    if score >= CONFIDENCE_HIGH:
+        return "segment-high"
+    elif score >= CONFIDENCE_MED:
+        return "segment-med"
+    else:
+        return "segment-low"
+
+
+def get_segment_word_stats(matched_ref: str) -> tuple[int, int]:
+    """Return (word_count, ayah_span) for a matched ref. (0, 1) if unparseable."""
+    if not matched_ref or "-" not in matched_ref:
+        return 0, 1
+    try:
+        start_ref, end_ref = matched_ref.split("-", 1)
+        start_parts = start_ref.split(":")
+        end_parts = end_ref.split(":")
+        if len(start_parts) < 3 or len(end_parts) < 3:
+            return 0, 1
+
+        # Ayah span
+        start_ayah = (int(start_parts[0]), int(start_parts[1]))
+        end_ayah = (int(end_parts[0]), int(end_parts[1]))
+        ayah_span = 1
+        if start_ayah != end_ayah:
+            ayah_span = abs(end_ayah[1] - start_ayah[1]) + 1 if start_ayah[0] == end_ayah[0] else 2
+
+        # Word count via index
+        word_count = 0
+        from src.quran_index import get_quran_index
+        index = get_quran_index()
+        indices = index.ref_to_indices(matched_ref)
+        if indices:
+            word_count = indices[1] - indices[0] + 1
+
+        return word_count, ayah_span
+    except Exception:
+        return 0, 1
+
+
+def check_undersegmented(matched_ref: str, duration: float) -> bool:
+    """Check if a segment is potentially undersegmented.
+
+    Criteria: (word_count >= threshold OR ayah_span >= threshold) AND duration >= threshold.
+    """
+    if duration < UNDERSEG_MIN_DURATION:
+        return False
+    word_count, ayah_span = get_segment_word_stats(matched_ref)
+    return word_count >= UNDERSEG_MIN_WORDS or ayah_span >= UNDERSEG_MIN_AYAH_SPAN
+
+
+# Arabic-Indic digits for verse markers
+ARABIC_DIGITS = {
+    '0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤',
+    '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩',
+}
+
+def to_arabic_numeral(number: int) -> str:
+    """Convert an integer to Arabic-Indic numerals."""
+    return ''.join(ARABIC_DIGITS[d] for d in str(number))
+
+
+def format_verse_marker(verse_num: int) -> str:
+    """
+    Format a verse number as an Arabic verse marker.
+    Uses U+06DD (Arabic End of Ayah) which renders as a decorated marker
+    in DigitalKhatt (combines U+06DD + digit into a single glyph).
+    """
+    numeral = to_arabic_numeral(verse_num)
+    end_of_ayah = '\u06DD'
+    return f'{end_of_ayah}{numeral}'
+
+
+# Cached verse word counts from surah_info.json
+_verse_word_counts_cache: dict[int, dict[int, int]] | None = None
+
+
+def _load_verse_word_counts() -> dict[int, dict[int, int]]:
+    """Load and cache verse word counts from surah_info.json."""
+    global _verse_word_counts_cache
+    if _verse_word_counts_cache is not None:
+        return _verse_word_counts_cache
+
+    with open(SURAH_INFO_PATH, 'r', encoding='utf-8') as f:
+        surah_info = json.load(f)
+
+    _verse_word_counts_cache = {}
+    for surah_num, data in surah_info.items():
+        surah_int = int(surah_num)
+        _verse_word_counts_cache[surah_int] = {}
+        for verse_data in data.get('verses', []):
+            verse_num = verse_data.get('verse')
+            num_words = verse_data.get('num_words', 0)
+            if verse_num:
+                _verse_word_counts_cache[surah_int][verse_num] = num_words
+
+    return _verse_word_counts_cache
+
+
+def split_into_char_groups(text):
+    """Split text into groups of base character + following combining marks.
+
+    Each group is one visible "letter" — a base character followed by any
+    diacritics (tashkeel) or other combining marks attached to it.
+    """
+    groups = []
+    current = ""
+    for ch in text:
+        if unicodedata.category(ch).startswith('M') and ch != '\u0670':
+            current += ch
+        else:
+            if current:
+                groups.append(current)
+            current = ch
+    if current:
+        groups.append(current)
+    return groups
+
+
+ZWSP = '\u200b'
+DAGGER_ALEF = '\u0670'
+
+def _wrap_word_with_chars(word_text, pos=None):
+    """Wrap a word in <span class="word"> with nested <span class="char"> per letter group."""
+    # Strip tatweel (U+0640) — MFA doesn't output it, so keeping it causes
+    # index misalignment during timestamp injection
+    word_text = word_text.replace('\u0640', '')
+    # Insert ZWSP before dagger alef so it can be highlighted independently
+    spans = []
+    for g in split_into_char_groups(word_text):
+        if g.startswith(DAGGER_ALEF):
+            spans.append(f'<span class="char">{ZWSP}{g}</span>')
+        else:
+            spans.append(f'<span class="char">{g}</span>')
+    char_spans = "".join(spans)
+    pos_attr = f' data-pos="{pos}"' if pos else ''
+    return f'<span class="word"{pos_attr}>{char_spans}</span>'
+
+
+def get_text_with_markers(matched_ref: str) -> str | None:
+    """
+    Generate matched text with verse markers inserted at verse boundaries.
+
+    Uses position-based detection: iterates words and inserts an HTML marker
+    after the last word of each verse (matching recitation_app approach).
+
+    Args:
+        matched_ref: Reference like "2:255:1-2:255:5"
+
+    Returns:
+        Text with verse markers, or None if ref is invalid
+    """
+    if not matched_ref:
+        return None
+
+    from src.quran_index import get_quran_index
+    index = get_quran_index()
+
+    indices = index.ref_to_indices(matched_ref)
+    if not indices:
+        return None
+
+    start_idx, end_idx = indices
+    verse_word_counts = _load_verse_word_counts()
+
+    parts = []
+    for w in index.words[start_idx:end_idx + 1]:
+        parts.append(_wrap_word_with_chars(w.display_text, pos=f"{w.surah}:{w.ayah}:{w.word}"))
+        # Check if this is the last word of its verse
+        num_words = verse_word_counts.get(w.surah, {}).get(w.ayah, 0)
+        if num_words > 0 and w.word == num_words:
+            parts.append(format_verse_marker(w.ayah))
+
+    return " ".join(parts)
+
+
+def simplify_ref(ref: str) -> str:
+    """Simplify a matched_ref like '84:9:1-84:9:4' to '84:9:1-4' when same verse."""
+    if not ref or "-" not in ref:
+        return ref
+    parts = ref.split("-")
+    if len(parts) != 2:
+        return ref
+    start, end = parts
+    start_parts = start.split(":")
+    end_parts = end.split(":")
+    if len(start_parts) == 3 and len(end_parts) == 3:
+        if start_parts[0] == end_parts[0] and start_parts[1] == end_parts[1]:
+            return f"{start}-{end_parts[2]}"
+    return ref
+
+
+def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = None, sample_rate: int = 0, render_key: str = "", segment_dir: Path = None, audio_preload: str = "metadata", audio_inline: bool = False) -> str:
+    """Render a single segment as an HTML card with optional audio player.
+
+    Args:
+        seg: Segment info
+        idx: Segment index
+        audio_int16: Full audio as int16 array for writing per-segment WAV files
+        sample_rate: Audio sample rate in Hz
+        render_key: Unique key to prevent browser caching between renders
+        segment_dir: Directory to write segment WAV files into
+    """
+    confidence_class = get_confidence_class(seg.match_score)
+    confidence_badge_class = confidence_class  # preserve original for badge color
+    if seg.has_missing_words:
+        confidence_class = "segment-low"
+    if seg.potentially_undersegmented and confidence_class != "segment-low":
+        confidence_class = "segment-underseg"
+
+    timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
+    duration = seg.end_time - seg.start_time
+
+    # Format reference (simplify same-verse refs)
+    ref_display = simplify_ref(seg.matched_ref) if seg.matched_ref else ""
+
+    # Confidence percentage with label
+    confidence_pct = f"Confidence: {seg.match_score:.0%}"
+
+    # Undersegmented badge
+    underseg_badge = ""
+    if seg.potentially_undersegmented:
+        underseg_badge = '<div class="segment-badge segment-underseg-badge">Potentially Undersegmented</div>'
+
+    # Missing words badge
+    missing_badge = ""
+    if seg.has_missing_words:
+        missing_badge = '<div class="segment-badge segment-low-badge">Missing Words</div>'
+
+    # Error display
+    error_html = ""
+    if seg.error:
+        error_html = f'<div class="segment-error">{seg.error}</div>'
+
+    # Audio player HTML — each segment gets its own WAV file served by Gradio.
+    audio_html = ""
+    if audio_int16 is not None and sample_rate > 0 and segment_dir is not None:
+        audio_src = encode_segment_audio(audio_int16, sample_rate, seg.start_time, seg.end_time, segment_dir, idx, inline=audio_inline)
+        # Add animate button only if segment has matched_ref (Quran text with word spans)
+        animate_btn = ""
+        if seg.matched_ref:
+            animate_btn = f'<button class="animate-btn" data-segment="{idx}" disabled>Animate</button>'
+        audio_html = f'''
+        <div class="segment-audio">
+            <audio data-src="{audio_src}" preload="none"
+                   style="display:none; width: 100%; height: 32px;">
+            </audio>
+            <button class="play-btn">&#9654;</button>
+            {animate_btn}
+        </div>
+        '''
+
+    # Build matched text with verse markers at all verse boundaries
+    BASMALA_TEXT = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم"
+    ISTIATHA_TEXT = "أَعُوذُ بِٱللَّهِ مِنَ الشَّيْطَانِ الرَّجِيم"
+    COMBINED_PREFIX = ISTIATHA_TEXT + " ۝ " + BASMALA_TEXT
+    _SPECIAL_PREFIXES = [COMBINED_PREFIX, ISTIATHA_TEXT, BASMALA_TEXT]
+
+    # Helper to wrap words in spans
+    def wrap_words_in_spans(text):
+        return " ".join(_wrap_word_with_chars(w) for w in text.split())
+
+    if seg.matched_ref:
+        # Generate text with markers from the index
+        text_html = get_text_with_markers(seg.matched_ref)
+        if text_html and seg.matched_text:
+            # Check for any special prefix (fused or forward-merged)
+            for _sp_name, _sp in [("Isti'adha+Basmala", COMBINED_PREFIX),
+                                   ("Isti'adha", ISTIATHA_TEXT),
+                                   ("Basmala", BASMALA_TEXT)]:
+                if seg.matched_text.startswith(_sp):
+                    mfa_prefix = f"{_sp_name}+{seg.matched_ref}"
+                    words = _sp.replace(" ۝ ", " ").split()
+                    prefix_html = " ".join(
+                        _wrap_word_with_chars(w, pos=f"{mfa_prefix}:0:0:{i+1}")
+                        for i, w in enumerate(words)
+                    )
+                    text_html = prefix_html + " " + text_html
+                    break
+        elif not text_html:
+            # Special ref (Basmala/Isti'adha): wrap words with indexed data-pos
+            # so MFA timestamps can be injected later
+            if seg.matched_ref and seg.matched_text:
+                words = seg.matched_text.split()
+                text_html = " ".join(
+                    _wrap_word_with_chars(w, pos=f"{seg.matched_ref}:0:0:{i+1}")
+                    for i, w in enumerate(words)
+                )
+            else:
+                text_html = seg.matched_text or ""
+    elif seg.matched_text:
+        # Special segments (Basmala/Isti'adha) have text but no ref
+        text_html = wrap_words_in_spans(seg.matched_text)
+    else:
+        text_html = ""
+
+    confidence_badge = "" if seg.has_missing_words else f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
+
+    # Build inline header: Segment N | ref | duration | time range
+    header_parts = [f"Segment {idx + 1}"]
+    if ref_display:
+        header_parts.append(ref_display)
+    header_parts.append(f"{duration:.1f}s")
+    header_parts.append(timestamp)
+    header_text = " | ".join(header_parts)
+
+    html = f'''
+    <div class="segment-card {confidence_class}" data-duration="{duration:.3f}" data-segment-idx="{idx}" data-matched-ref="{seg.matched_ref or ''}" data-start-time="{seg.start_time:.4f}" data-end-time="{seg.end_time:.4f}">
+        <div class="segment-header">
+            <div class="segment-title">{header_text}</div>
+            <div class="segment-badges">
+                {underseg_badge}
+                {confidence_badge}
+                {missing_badge}
+            </div>
+        </div>
+
+        {audio_html}
+
+        <div class="segment-text">
+            {text_html}
+        </div>
+
+        {error_html}
+    </div>
+    '''
+    return html
+
+
+def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, cpu_fallback: bool = False, segment_dir: Path = None) -> str:
+    """Render all segments as HTML with optional audio players.
+
+    Args:
+        segments: List of SegmentInfo objects
+        audio_int16: Full audio as int16 array for writing per-segment WAV files
+        sample_rate: Audio sample rate in Hz
+        cpu_fallback: If True, show warning that GPU quota was exhausted
+        segment_dir: Directory containing per-segment WAV files
+    """
+    import time
+    import wave
+
+    if not segments:
+        return '<div class="no-segments">No segments detected</div>'
+
+    # Generate unique key for this render to prevent audio caching
+    render_key = str(int(time.time() * 1000))
+
+    # Write full audio file for unified megacard playback
+    full_audio_url = ""
+    if audio_int16 is not None and sample_rate > 0 and segment_dir:
+        full_path = segment_dir / "full.wav"
+        with wave.open(str(full_path), 'wb') as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(sample_rate)
+            wf.writeframes(audio_int16.tobytes())
+        full_audio_url = f"/gradio_api/file={full_path}"
+
+    # Categorize segments by confidence level (1-indexed for display)
+    med_segments = [i + 1 for i, s in enumerate(segments) if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH]
+    low_segments = [i + 1 for i, s in enumerate(segments) if s.match_score < CONFIDENCE_MED]
+
+    # Build header with confidence summary
+    header_parts = []
+
+    # GPU quota warning banner
+    if cpu_fallback:
+        reset_time = get_quota_reset_time()
+        reset_msg = f' Resets in {reset_time}.' if reset_time else ''
+        header_parts.append(
+            '<div class="gpu-quota-warning">'
+            f'<strong>Daily GPU quota reached.</strong> Processing on CPU (slower performance).{reset_msg}'
+            '</div>'
+        )
+
+    header_parts.append(f'<div class="segments-header">Found {len(segments)} segments</div>')
+
+    # Combined review summary: merge medium and low confidence segments into one color-coded list
+    low_set = set(low_segments)
+    all_review = sorted(set(med_segments) | low_set)
+    if all_review:
+        def _span(n: int) -> str:
+            css = "segment-low-text" if n in low_set else "segment-med-text"
+            return f'<span class="{css}">{n}</span>'
+
+        if len(all_review) <= REVIEW_SUMMARY_MAX_SEGMENTS:
+            seg_html = ", ".join(_span(n) for n in all_review)
+        else:
+            seg_html = ", ".join(_span(n) for n in all_review[:REVIEW_SUMMARY_MAX_SEGMENTS])
+            remaining = len(all_review) - REVIEW_SUMMARY_MAX_SEGMENTS
+            seg_html += f" ... and {remaining} more"
+
+        header_parts.append(
+            f'<div class="segments-review-summary">'
+            f'Needs review: {len(all_review)} (segments {seg_html})'
+            f'</div>'
+        )
+
+    missing_segments = [i + 1 for i, s in enumerate(segments) if s.has_missing_words]
+    if missing_segments:
+        # Group consecutive segment numbers into pairs (gaps always flag both neighbors)
+        missing_pairs = []
+        i = 0
+        while i < len(missing_segments):
+            if i + 1 < len(missing_segments) and missing_segments[i + 1] == missing_segments[i] + 1:
+                missing_pairs.append(f"{missing_segments[i]}/{missing_segments[i + 1]}")
+                i += 2
+            else:
+                missing_pairs.append(str(missing_segments[i]))
+                i += 1
+
+        if len(missing_pairs) <= REVIEW_SUMMARY_MAX_SEGMENTS:
+            pairs_display = ", ".join(missing_pairs)
+        else:
+            pairs_display = ", ".join(missing_pairs[:REVIEW_SUMMARY_MAX_SEGMENTS])
+            remaining = len(missing_pairs) - REVIEW_SUMMARY_MAX_SEGMENTS
+            pairs_display += f" ... and {remaining} more"
+
+        header_parts.append(
+            f'<div class="segments-review-summary">'
+            f'Segments with missing words: <span class="segment-low-text">{len(missing_pairs)} (segments {pairs_display})</span>'
+            f'</div>'
+        )
+
+    underseg_segments = [i + 1 for i, s in enumerate(segments) if s.potentially_undersegmented]
+    if underseg_segments:
+        if len(underseg_segments) <= REVIEW_SUMMARY_MAX_SEGMENTS:
+            underseg_display = ", ".join(str(n) for n in underseg_segments)
+        else:
+            underseg_display = ", ".join(str(n) for n in underseg_segments[:REVIEW_SUMMARY_MAX_SEGMENTS])
+            remaining = len(underseg_segments) - REVIEW_SUMMARY_MAX_SEGMENTS
+            underseg_display += f" ... and {remaining} more"
+
+        header_parts.append(
+            f'<div class="segments-review-summary">'
+            f'Potentially undersegmented: <span class="segment-underseg-text">{len(underseg_segments)} (segments {underseg_display})</span>'
+            f'</div>'
+        )
+
+    html_parts = [
+        f'<div class="segments-container" data-render-key="{render_key}" data-full-audio="{full_audio_url}">',
+        "\n".join(header_parts),
+    ]
+
+    for idx, seg in enumerate(segments):
+        inline = idx < AUDIO_PRELOAD_COUNT
+        preload = "auto" if inline else "metadata"
+        html_parts.append(render_segment_card(seg, idx, audio_int16, sample_rate, render_key, segment_dir, audio_preload=preload, audio_inline=inline))
+
+    html_parts.append('</div>')
+
+    return "\n".join(html_parts)
+
+
+# =============================================================================
+# Main processing
+# =============================================================================
+
+def encode_segment_audio(
+    audio_int16: np.ndarray, sample_rate: int,
+    start_time: float, end_time: float,
+    segment_dir: Path, segment_idx: int,
+    inline: bool = False,
+) -> str:
+    """Write a segment's audio slice as a WAV file and return a src URL.
+
+    Args:
+        audio_int16: Full audio already converted to int16 (avoids per-segment conversion).
+        sample_rate: Sample rate in Hz.
+        start_time: Segment start in seconds.
+        end_time: Segment end in seconds.
+        segment_dir: Directory to write the WAV file into.
+        segment_idx: Segment index (used for filename).
+        inline: If True, return a base64 data URI instead of a file URL.
+
+    Returns a ``data:`` URI (inline) or ``/gradio_api/file=`` URL.
+    """
+    import wave
+    import io
+
+    start_sample = int(start_time * sample_rate)
+    end_sample = int(end_time * sample_rate)
+    segment_audio = audio_int16[start_sample:end_sample]
+
+    # Always write WAV to disk (needed by MFA timestamp computation)
+    path = segment_dir / f"seg_{segment_idx}.wav"
+    with wave.open(str(path), 'wb') as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(segment_audio.tobytes())
+
+    if inline:
+        import base64
+        with open(path, 'rb') as f:
+            b64 = base64.b64encode(f.read()).decode('ascii')
+        return f"data:audio/wav;base64,{b64}"
+
+    return f"/gradio_api/file={path}"
+
+
+def is_end_of_verse(matched_ref: str) -> bool:
+    """
+    Check if a reference ends at the last word of a verse.
+    Expects formats like "2:255:1-2:255:5" or "2:255:5".
+    """
+    if not matched_ref or ":" not in matched_ref:
+        return False
+
+    try:
+        # Take the end part of the range (or the single ref)
+        end_ref = matched_ref.split("-")[-1]
+        parts = end_ref.split(":")
+        if len(parts) < 3:
+            return False
+
+        surah = int(parts[0])
+        ayah = int(parts[1])
+        word = int(parts[2])
+
+        verse_word_counts = _load_verse_word_counts()
+        if surah not in verse_word_counts:
+            return False
+
+        num_words = verse_word_counts[surah].get(ayah, 0)
+        return word >= num_words
+    except Exception as e:
+        print(f"Error checking end of verse: {e}")
+
+    return False
+
+
+def _run_post_vad_pipeline(
+    audio, sample_rate, intervals,
+    model_name, device, profiling, pipeline_start, progress_steps,
+    progress=gr.Progress(),
+    precomputed_asr=None,
+    min_silence_ms=0, min_speech_ms=0, pad_ms=0,
+    request=None, log_row=None,
+):
+    """Shared pipeline after VAD: ASR → specials → anchor → matching → results.
+
+    Args:
+        audio: Preprocessed float32 mono 16kHz audio array
+        sample_rate: Sample rate (16000)
+        intervals: List of (start, end) tuples from VAD cleaning
+        model_name: ASR model name ("Base" or "Large")
+        device: Device string ("gpu" or "cpu")
+        profiling: ProfilingData instance to populate
+        pipeline_start: time.time() when pipeline started
+        precomputed_asr: Optional tuple of (results, batch_profiling, sorting_time,
+            batch_build_time, gpu_move_time, gpu_time) from a combined GPU lease.
+            If provided, skips the standalone ASR GPU call.
+
+    Returns:
+        (html, json_output, segment_dir) tuple
+    """
+    import time
+
+    if not intervals:
+        return "<div>No speech segments detected in audio</div>", {"segments": []}, None
+
+    # Build VAD segments and extract audio arrays
+    vad_segments = []
+    segment_audios = []
+    for idx, (start, end) in enumerate(intervals):
+        vad_segments.append(VadSegment(start_time=start, end_time=end, segment_idx=idx))
+        start_sample = int(start * sample_rate)
+        end_sample = int(end * sample_rate)
+        segment_audios.append(audio[start_sample:end_sample])
+
+    print(f"[VAD] {len(vad_segments)} segments")
+
+    if precomputed_asr is not None:
+        # ASR already ran within the combined GPU lease
+        phoneme_texts, asr_batch_profiling, asr_sorting_time, asr_batch_build_time, asr_gpu_move_time, asr_gpu_time = precomputed_asr
+        print(f"[PHONEME ASR] {len(phoneme_texts)} results (precomputed, gpu {asr_gpu_time:.2f}s)")
+    else:
+        # Standalone ASR GPU lease (resegment/retranscribe paths)
+        progress(*progress_steps["asr"])
+        print(f"[STAGE] Running ASR...")
+
+        phoneme_asr_start = time.time()
+        phoneme_texts, asr_batch_profiling, asr_sorting_time, asr_batch_build_time, asr_gpu_move_time, asr_gpu_time = run_phoneme_asr_gpu(segment_audios, sample_rate, model_name)
+        phoneme_asr_time = time.time() - phoneme_asr_start
+        profiling.asr_time = phoneme_asr_time
+        profiling.asr_gpu_time = asr_gpu_time
+        profiling.asr_model_move_time = asr_gpu_move_time
+        profiling.asr_sorting_time = asr_sorting_time
+        profiling.asr_batch_build_time = asr_batch_build_time
+        profiling.asr_batch_profiling = asr_batch_profiling
+        print(f"[PHONEME ASR] {len(phoneme_texts)} results in {phoneme_asr_time:.2f}s (gpu {asr_gpu_time:.2f}s)")
+
+    if asr_batch_profiling:
+        for b in asr_batch_profiling:
+            print(f"  Batch {b['batch_num']:>2}: {b['size']:>3} segs | "
+                  f"{b['time']:.3f}s | "
+                  f"{b['min_dur']:.2f}-{b['max_dur']:.2f}s "
+                  f"(A {b['avg_dur']:.2f}s, T {b['total_seconds']:.1f}s, W {b['pad_waste']:.0%})")
+
+    # Phoneme-based special segment detection
+    progress(*progress_steps["special_segments"])
+    print(f"[STAGE] Detecting special segments...")
+    from src.alignment.special_segments import detect_special_segments
+    vad_segments, segment_audios, special_results, first_quran_idx = detect_special_segments(
+        phoneme_texts, vad_segments, segment_audios
+    )
+
+    # If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
+    # with empty placeholders so indices stay aligned.
+    if len(vad_segments) != len(phoneme_texts):
+        phoneme_texts = [[], []] + phoneme_texts[1:]
+
+    # Anchor detection via phoneme n-gram voting
+    progress(*progress_steps["anchor"])
+    print(f"[STAGE] Anchor detection...")
+    anchor_start = time.time()
+    from src.alignment.phoneme_anchor import find_anchor_by_voting, verse_to_word_index
+    from src.alignment.ngram_index import get_ngram_index
+    from src.alignment.phoneme_matcher_cache import get_chapter_reference
+
+    surah, ayah = find_anchor_by_voting(
+        phoneme_texts[first_quran_idx:],
+        get_ngram_index(),
+        ANCHOR_SEGMENTS,
+    )
+
+    if surah == 0:
+        raise ValueError("Could not anchor to any chapter - no n-gram matches found")
+
+    profiling.anchor_time = time.time() - anchor_start
+    print(f"[ANCHOR] Anchored to Surah {surah}, Ayah {ayah}")
+
+    # Build chapter reference and set pointer
+    chapter_ref = get_chapter_reference(surah)
+    pointer = verse_to_word_index(chapter_ref, ayah)
+
+    progress(*progress_steps["matching"])
+    print(f"[STAGE] Text Matching...")
+
+    # Phoneme-based DP alignment
+    match_start = time.time()
+    match_results, match_profiling, gap_segments = run_phoneme_matching(
+        phoneme_texts,
+        surah,
+        first_quran_idx,
+        special_results,
+        start_pointer=pointer,
+    )
+    match_time = time.time() - match_start
+    profiling.match_wall_time = match_time
+    print(f"[MATCH] {len(match_results)} phoneme alignments in {match_time:.2f}s")
+
+    # Populate phoneme alignment profiling (if enabled)
+    if PHONEME_ALIGNMENT_PROFILING:
+        profiling.phoneme_total_time = match_profiling.get("total_time", 0.0)
+        profiling.phoneme_ref_build_time = match_profiling.get("ref_build_time", 0.0)
+        profiling.phoneme_dp_total_time = match_profiling.get("dp_total_time", 0.0)
+        profiling.phoneme_dp_min_time = match_profiling.get("dp_min_time", 0.0)
+        profiling.phoneme_dp_max_time = match_profiling.get("dp_max_time", 0.0)
+        profiling.phoneme_window_setup_time = match_profiling.get("window_setup_time", 0.0)
+        profiling.phoneme_result_build_time = match_profiling.get("result_build_time", 0.0)
+        profiling.phoneme_num_segments = match_profiling.get("num_segments", 0)
+
+    # Retry / reanchor counters (always available)
+    profiling.tier1_attempts = match_profiling.get("tier1_attempts", 0)
+    profiling.tier1_passed = match_profiling.get("tier1_passed", 0)
+    profiling.tier1_segments = match_profiling.get("tier1_segments", [])
+    profiling.tier2_attempts = match_profiling.get("tier2_attempts", 0)
+    profiling.tier2_passed = match_profiling.get("tier2_passed", 0)
+    profiling.tier2_segments = match_profiling.get("tier2_segments", [])
+    profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
+    profiling.special_merges = match_profiling.get("special_merges", 0)
+    profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
+    profiling.segments_passed = match_profiling.get("segments_passed", 0)
+
+    progress(*progress_steps["building"])
+    print(f"[STAGE] Building results...")
+
+    # Build SegmentInfo list
+    segments = []
+    result_build_start = time.time()
+
+    # Convert full audio to int16 once
+    t_wav = time.time()
+    audio_int16 = (audio * 32767).astype(np.int16)
+    audio_encode_time = time.time() - t_wav
+
+    # Create a per-request directory for segment WAV files
+    import uuid
+    segment_dir = SEGMENT_AUDIO_DIR / uuid.uuid4().hex
+    segment_dir.mkdir(parents=True, exist_ok=True)
+
+    last_display_idx = len(vad_segments) - 1
+
+    # Tracking lists for segment stats logging
+    _seg_word_counts: list[int] = []
+    _seg_durations: list[float] = []
+    _seg_phoneme_counts: list[int] = []
+    _seg_ayah_spans: list[int] = []
+    _underseg_indices: list[int] = []
+    _underseg_by_words: list[int] = []
+    _underseg_by_ayah: list[int] = []
+
+    for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
+        zip(vad_segments, match_results)
+    ):
+        if idx == last_display_idx and matched_ref:
+            if not is_end_of_verse(matched_ref):
+                score = max(0.0, score - 0.25)
+
+        error = None
+        phoneme_text = " ".join(phoneme_texts[idx]) if idx < len(phoneme_texts) else ""
+
+        if score <= 0.0:
+            matched_text = ""
+            matched_ref = ""
+            error = f"Low confidence ({score:.0%})"
+
+        duration = seg.end_time - seg.start_time
+        word_count, ayah_span = get_segment_word_stats(matched_ref)
+        underseg = check_undersegmented(matched_ref, duration)
+
+        segments.append(SegmentInfo(
+            start_time=seg.start_time,
+            end_time=seg.end_time,
+            transcribed_text=phoneme_text,
+            matched_text=matched_text,
+            matched_ref=matched_ref,
+            match_score=score,
+            error=error,
+            has_missing_words=idx in gap_segments,
+            potentially_undersegmented=underseg,
+        ))
+
+        # Track per-segment stats for logging
+        _seg_word_counts.append(word_count)
+        _seg_durations.append(duration)
+        _seg_phoneme_counts.append(len(phoneme_texts[idx]) if idx < len(phoneme_texts) else 0)
+        _seg_ayah_spans.append(ayah_span)
+        if underseg:
+            _underseg_indices.append(idx + 1)
+            if word_count >= UNDERSEG_MIN_WORDS:
+                _underseg_by_words.append(idx + 1)
+            if ayah_span >= UNDERSEG_MIN_AYAH_SPAN:
+                _underseg_by_ayah.append(idx + 1)
+
+    # Recompute from actual output
+    profiling.segments_attempted = len(segments)
+    profiling.segments_passed = sum(1 for s in segments if s.match_score > 0.0)
+
+    result_build_total_time = time.time() - result_build_start
+    profiling.result_build_time = result_build_total_time
+    profiling.result_audio_encode_time = audio_encode_time
+
+    progress(*progress_steps["done"])
+    print("[STAGE] Done!")
+
+    # Print profiling summary
+    profiling.total_time = time.time() - pipeline_start
+    print(profiling.summary())
+
+    # Segment distribution stats
+    matched_words = [w for w in _seg_word_counts if w > 0]
+    matched_durs = [d for i, d in enumerate(_seg_durations) if _seg_word_counts[i] > 0]
+    matched_phonemes = [p for i, p in enumerate(_seg_phoneme_counts) if _seg_word_counts[i] > 0]
+    pauses = [vad_segments[i + 1].start_time - vad_segments[i].end_time
+              for i in range(len(vad_segments) - 1)]
+    pauses = [p for p in pauses if p > 0]
+    if matched_words:
+        def _std(vals):
+            n = len(vals)
+            if n < 2:
+                return 0.0
+            mean = sum(vals) / n
+            return (sum((v - mean) ** 2 for v in vals) / n) ** 0.5
+
+        avg_w = sum(matched_words) / len(matched_words)
+        std_w = _std(matched_words)
+        min_w, max_w = min(matched_words), max(matched_words)
+        avg_d = sum(matched_durs) / len(matched_durs)
+        std_d = _std(matched_durs)
+        min_d, max_d = min(matched_durs), max(matched_durs)
+        total_speech_sec = sum(matched_durs)
+        total_words = sum(matched_words)
+        total_phonemes = sum(matched_phonemes)
+        wpm = total_words / (total_speech_sec / 60) if total_speech_sec > 0 else 0
+        pps = total_phonemes / total_speech_sec if total_speech_sec > 0 else 0
+        print(f"\n[SEGMENT STATS] {len(segments)} total segments, {len(matched_words)} matched")
+        print(f"  Words/segment : min={min_w}, max={max_w}, avg={avg_w:.1f}\u00b1{std_w:.1f}")
+        print(f"  Duration (s)  : min={min_d:.1f}, max={max_d:.1f}, avg={avg_d:.1f}\u00b1{std_d:.1f}")
+        if pauses:
+            avg_p = sum(pauses) / len(pauses)
+            std_p = _std(pauses)
+            print(f"  Pause (s)     : min={min(pauses):.1f}, max={max(pauses):.1f}, avg={avg_p:.1f}\u00b1{std_p:.1f}")
+        print(f"  Speech pace   : {wpm:.1f} words/min, {pps:.1f} phonemes/sec (speech time only)")
+    if _underseg_indices:
+        print(f"  Undersegmented: {len(_underseg_indices)} (segments {', '.join(str(n) for n in _underseg_indices)})")
+        if _underseg_by_words:
+            print(f"    by word count (>={UNDERSEG_MIN_WORDS}): {', '.join(str(n) for n in _underseg_by_words)}")
+        if _underseg_by_ayah:
+            print(f"    by ayah span  (>={UNDERSEG_MIN_AYAH_SPAN}): {', '.join(str(n) for n in _underseg_by_ayah)}")
+    else:
+        print(f"  Undersegmented: 0")
+
+    # --- Usage logging ---
+    try:
+        from utils.usage_logger import log_alignment, update_alignment_row
+
+        # Reciter stats (default 0.0 when no matched segments)
+        _log_wpm = wpm if matched_words else 0.0
+        _log_pps = pps if matched_words else 0.0
+        _log_avg_d = avg_d if matched_words else 0.0
+        _log_std_d = std_d if matched_words else 0.0
+        _log_avg_p = avg_p if (matched_words and pauses) else 0.0
+        _log_std_p = std_p if (matched_words and pauses) else 0.0
+
+        # Mean confidence across all segments
+        all_scores = [seg.match_score for seg in segments]
+        _log_mean_conf = sum(all_scores) / len(all_scores) if all_scores else 0.0
+
+        # Build per-segment objects for logging
+        _log_segments = []
+        for i, seg in enumerate(segments):
+            sp_type = None
+            if i < len(special_results) and special_results[i]:
+                sp_type = special_results[i]
+            _log_segments.append({
+                "idx": i + 1,
+                "start": round(seg.start_time, 2),
+                "end": round(seg.end_time, 2),
+                "duration": round(seg.end_time - seg.start_time, 2),
+                "ref": seg.matched_ref or "",
+                "confidence": round(seg.match_score, 2),
+                "word_count": _seg_word_counts[i] if i < len(_seg_word_counts) else 0,
+                "ayah_span": _seg_ayah_spans[i] if i < len(_seg_ayah_spans) else 0,
+                "phoneme_count": _seg_phoneme_counts[i] if i < len(_seg_phoneme_counts) else 0,
+                "undersegmented": seg.potentially_undersegmented,
+                "missing_words": seg.has_missing_words,
+                "special_type": sp_type,
+                "error": seg.error,
+            })
+
+        _r = lambda v: round(v, 2)
+        _log_kwargs = dict(
+            audio_duration_s=_r(len(audio) / sample_rate),
+            num_segments=len(segments),
+            surah=surah,
+            min_silence_ms=min_silence_ms,
+            min_speech_ms=min_speech_ms,
+            pad_ms=pad_ms,
+            asr_model=model_name,
+            device=device,
+            total_time=_r(profiling.total_time),
+            vad_queue_time=_r(getattr(profiling, "vad_wall_time", 0.0) - getattr(profiling, "vad_gpu_time", 0.0)),
+            vad_gpu_time=_r(getattr(profiling, "vad_gpu_time", 0.0)),
+            asr_gpu_time=_r(getattr(profiling, "asr_gpu_time", 0.0)),
+            dp_total_time=_r(getattr(profiling, "phoneme_dp_total_time", 0.0)),
+            segments_passed=getattr(profiling, "segments_passed", 0),
+            segments_failed=getattr(profiling, "segments_attempted", 0) - getattr(profiling, "segments_passed", 0),
+            mean_confidence=_r(_log_mean_conf),
+            tier1_retries=getattr(profiling, "tier1_attempts", 0),
+            tier1_passed=getattr(profiling, "tier1_passed", 0),
+            tier2_retries=getattr(profiling, "tier2_attempts", 0),
+            tier2_passed=getattr(profiling, "tier2_passed", 0),
+            reanchors=getattr(profiling, "consec_reanchors", 0),
+            special_merges=getattr(profiling, "special_merges", 0),
+            words_per_minute=_r(_log_wpm),
+            phonemes_per_second=_r(_log_pps),
+            avg_segment_duration=_r(_log_avg_d),
+            std_segment_duration=_r(_log_std_d),
+            avg_pause_duration=_r(_log_avg_p),
+            std_pause_duration=_r(_log_std_p),
+            log_segments=_log_segments,
+        )
+
+        if log_row is not None:
+            # Resegment / retranscribe: mutate existing row in-place
+            _action = "retranscribe" if log_row.get("asr_model") != model_name else "resegment"
+            update_alignment_row(log_row, action=_action, **_log_kwargs)
+        else:
+            # Initial run: create new row
+            log_row = log_alignment(
+                audio=audio,
+                sample_rate=sample_rate,
+                request=request,
+                **_log_kwargs,
+            )
+    except Exception as e:
+        print(f"[USAGE_LOG] Failed: {e}")
+
+    # Build JSON output for API consumers
+    def parse_ref(matched_ref):
+        if not matched_ref:
+            return "", ""
+        if "-" in matched_ref:
+            parts = matched_ref.split("-")
+            return parts[0], parts[1] if len(parts) > 1 else parts[0]
+        return matched_ref, matched_ref
+
+    segments_list = []
+    for i, seg in enumerate(segments):
+        segment_data = {
+            "segment": i + 1,
+            "time_from": round(seg.start_time, 3),
+            "time_to": round(seg.end_time, 3),
+            "ref_from": parse_ref(seg.matched_ref)[0],
+            "ref_to": parse_ref(seg.matched_ref)[1],
+            "matched_text": seg.matched_text or "",
+            "confidence": round(seg.match_score, 3),
+            "potentially_undersegmented": seg.potentially_undersegmented,
+            "error": seg.error
+        }
+        segments_list.append(segment_data)
+
+    json_output = {"segments": segments_list}
+
+    # Check if we fell back to CPU due to quota exhaustion
+    cpu_fallback = is_quota_exhausted() and not is_user_forced_cpu()
+
+    return render_segments(segments, audio_int16, sample_rate, cpu_fallback=cpu_fallback, segment_dir=segment_dir), json_output, str(segment_dir), log_row
+
+
+def process_audio(
+    audio_data,
+    min_silence_ms,
+    min_speech_ms,
+    pad_ms,
+    model_name="Base",
+    device="GPU",
+    request: gr.Request = None,
+    progress=gr.Progress(),
+):
+    """Process uploaded audio and extract segments with automatic verse detection.
+
+    Returns:
+        (html, json_output, raw_speech_intervals, raw_is_complete, preprocessed_audio, sample_rate, intervals, segment_dir, log_row)
+    """
+    import time
+
+    if audio_data is None:
+        return "<div>Please upload an audio file</div>", None, None, None, None, None, None, None, None
+
+    # Normalize device label to lowercase for downstream checks
+    device = device.lower()
+
+    # Reset per-request so each request retries GPU fresh
+    from src.zero_gpu import reset_quota_flag, force_cpu_mode
+    reset_quota_flag()
+
+    if device == "cpu":
+        force_cpu_mode()
+
+    print(f"\n{'='*60}")
+    print(f"Processing audio with automatic verse detection")
+    print(f"Settings: silence={min_silence_ms}ms, speech={min_speech_ms}ms, pad={pad_ms}ms, device={device}")
+    print(f"{'='*60}")
+
+    # Initialize profiling data
+    profiling = ProfilingData()
+    pipeline_start = time.time()
+
+    sample_rate, audio = audio_data
+
+    # Convert to float32
+    if audio.dtype == np.int16:
+        audio = audio.astype(np.float32) / 32768.0
+    elif audio.dtype == np.int32:
+        audio = audio.astype(np.float32) / 2147483648.0
+
+    # Convert stereo to mono
+    if len(audio.shape) > 1:
+        audio = audio.mean(axis=1)
+
+    # Resample to 16kHz once (both VAD and ASR models require 16kHz)
+    if sample_rate != 16000:
+        resample_start = time.time()
+        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000, res_type=RESAMPLE_TYPE)
+        profiling.resample_time = time.time() - resample_start
+        print(f"[PROFILE] Resampling {sample_rate}Hz -> 16000Hz took {profiling.resample_time:.3f}s (audio length: {len(audio)/16000:.1f}s, res_type={RESAMPLE_TYPE})")
+        sample_rate = 16000
+
+    progress(*PROGRESS_PROCESS_AUDIO["vad_asr"])
+    print("[STAGE] Running VAD + ASR...")
+
+    # Single GPU lease: VAD + ASR
+    gpu_start = time.time()
+    (intervals, vad_profiling, vad_gpu_time, raw_speech_intervals, raw_is_complete,
+     asr_results, asr_batch_profiling, asr_sorting_time, asr_batch_build_time,
+     asr_gpu_move_time, asr_gpu_time) = run_vad_and_asr_gpu(
+        audio, sample_rate, int(min_silence_ms), int(min_speech_ms), int(pad_ms), model_name
+    )
+    wall_time = time.time() - gpu_start
+
+    # VAD profiling: queue wait is attributed to VAD (it happens before VAD runs)
+    profiling.vad_model_load_time = vad_profiling.get("model_load_time", 0.0)
+    profiling.vad_model_move_time = vad_profiling.get("model_move_time", 0.0)
+    profiling.vad_inference_time = vad_profiling.get("inference_time", 0.0)
+    profiling.vad_gpu_time = vad_gpu_time
+    profiling.vad_wall_time = wall_time - asr_gpu_time
+    print(f"[GPU] VAD completed in {profiling.vad_wall_time:.2f}s (gpu {vad_gpu_time:.2f}s)")
+
+    if not intervals:
+        return "<div>No speech segments detected in audio</div>", None, None, None, None, None, None, None, None
+
+    # ASR profiling: no separate queue (ran within same lease)
+    profiling.asr_time = asr_gpu_time
+    profiling.asr_gpu_time = asr_gpu_time
+    profiling.asr_model_move_time = asr_gpu_move_time
+    profiling.asr_sorting_time = asr_sorting_time
+    profiling.asr_batch_build_time = asr_batch_build_time
+    profiling.asr_batch_profiling = asr_batch_profiling
+    print(f"[GPU] ASR completed in {asr_gpu_time:.2f}s")
+
+    # Run post-VAD pipeline (ASR already done, pass results)
+    html, json_output, seg_dir, log_row = _run_post_vad_pipeline(
+        audio, sample_rate, intervals,
+        model_name, device, profiling, pipeline_start, PROGRESS_PROCESS_AUDIO,
+        progress=progress,
+        precomputed_asr=(asr_results, asr_batch_profiling, asr_sorting_time, asr_batch_build_time, asr_gpu_move_time, asr_gpu_time),
+        min_silence_ms=min_silence_ms, min_speech_ms=min_speech_ms, pad_ms=pad_ms,
+        request=request,
+    )
+
+    return html, json_output, raw_speech_intervals, raw_is_complete, audio, sample_rate, intervals, seg_dir, log_row
+
+
+def resegment_audio(
+    cached_speech_intervals, cached_is_complete,
+    cached_audio, cached_sample_rate,
+    min_silence_ms, min_speech_ms, pad_ms,
+    model_name="Base", device="GPU",
+    cached_log_row=None,
+    request: gr.Request = None,
+    progress=gr.Progress(),
+):
+    """Re-run segmentation with different settings using cached VAD data.
+
+    Skips the heavy VAD model inference — only re-cleans speech intervals
+    and re-runs ASR + downstream pipeline.
+
+    Returns:
+        (html, json_output, cached_speech_intervals, cached_is_complete, cached_audio, cached_sample_rate, intervals, segment_dir, log_row)
+    """
+    import time
+
+    if cached_speech_intervals is None or cached_audio is None:
+        return "<div>No cached data. Please run Extract Segments first.</div>", None, None, None, None, None, None, None, None
+
+    # Normalize device label
+    device = device.lower()
+
+    from src.zero_gpu import reset_quota_flag, force_cpu_mode
+    reset_quota_flag()
+    if device == "cpu":
+        force_cpu_mode()
+
+    print(f"\n{'='*60}")
+    print(f"RESEGMENTING with different settings")
+    print(f"Settings: silence={min_silence_ms}ms, speech={min_speech_ms}ms, pad={pad_ms}ms")
+    print(f"{'='*60}")
+
+    profiling = ProfilingData()
+    pipeline_start = time.time()
+
+    progress(*PROGRESS_RESEGMENT["resegment"])
+    print("[STAGE] Resegmenting...")
+
+    # Re-clean speech intervals with new parameters (CPU, no GPU needed)
+    from recitations_segmenter import clean_speech_intervals
+    clean_out = clean_speech_intervals(
+        cached_speech_intervals,
+        cached_is_complete,
+        min_silence_duration_ms=int(min_silence_ms),
+        min_speech_duration_ms=int(min_speech_ms),
+        pad_duration_ms=int(pad_ms),
+        return_seconds=True,
+    )
+
+    intervals = clean_out.clean_speech_intervals.tolist()
+    intervals = [(start, end) for start, end in intervals]
+
+    raw_count = len(cached_speech_intervals)
+    final_count = len(intervals)
+    removed = raw_count - final_count
+    print(f"[RESEGMENT] Raw intervals: {raw_count}, after cleaning: {final_count} "
+          f"({removed} removed by silence merge + min_speech={min_speech_ms}ms filter)")
+
+    if not intervals:
+        return "<div>No speech segments detected with these settings</div>", None, cached_speech_intervals, cached_is_complete, cached_audio, cached_sample_rate, None, None, cached_log_row
+
+    # Run post-VAD pipeline
+    html, json_output, seg_dir, log_row = _run_post_vad_pipeline(
+        cached_audio, cached_sample_rate, intervals,
+        model_name, device, profiling, pipeline_start, PROGRESS_RESEGMENT,
+        progress=progress,
+        min_silence_ms=min_silence_ms, min_speech_ms=min_speech_ms, pad_ms=pad_ms,
+        request=request, log_row=cached_log_row,
+    )
+
+    # Pass through cached state unchanged, but update intervals
+    return html, json_output, cached_speech_intervals, cached_is_complete, cached_audio, cached_sample_rate, intervals, seg_dir, log_row
+
+
+def retranscribe_audio(
+    cached_intervals,
+    cached_audio, cached_sample_rate,
+    cached_speech_intervals, cached_is_complete,
+    model_name,
+    device="GPU",
+    cached_log_row=None,
+    min_silence_ms=0, min_speech_ms=0, pad_ms=0,
+    request: gr.Request = None,
+    progress=gr.Progress(),
+):
+    """Re-run ASR + downstream with a different model using cached intervals.
+
+    Uses the same segment boundaries but a different ASR model.
+
+    Returns:
+        (html, json_output, cached_speech_intervals, cached_is_complete,
+         cached_audio, cached_sample_rate, cached_intervals, segment_dir, log_row)
+    """
+    import time
+
+    if cached_intervals is None or cached_audio is None:
+        return "<div>No cached data. Please run Extract Segments first.</div>", None, None, None, None, None, None, None, None
+
+    device = device.lower()
+
+    from src.zero_gpu import reset_quota_flag, force_cpu_mode
+    reset_quota_flag()
+    if device == "cpu":
+        force_cpu_mode()
+
+    print(f"\n{'='*60}")
+    print(f"RETRANSCRIBING with {model_name} model")
+    print(f"{'='*60}")
+
+    profiling = ProfilingData()
+    pipeline_start = time.time()
+
+    pct, desc = PROGRESS_RETRANSCRIBE["retranscribe"]
+    progress(pct, desc=desc.format(model=model_name))
+    print(f"[STAGE] Retranscribing with {model_name} model...")
+
+    html, json_output, seg_dir, log_row = _run_post_vad_pipeline(
+        cached_audio, cached_sample_rate, cached_intervals,
+        model_name, device, profiling, pipeline_start, PROGRESS_RETRANSCRIBE,
+        progress=progress,
+        min_silence_ms=min_silence_ms, min_speech_ms=min_speech_ms, pad_ms=pad_ms,
+        request=request, log_row=cached_log_row,
+    )
+
+    # Pass through all cached state unchanged
+    return html, json_output, cached_speech_intervals, cached_is_complete, cached_audio, cached_sample_rate, cached_intervals, seg_dir, log_row
+
+
+def _retranscribe_wrapper(
+    cached_intervals, cached_audio, cached_sample_rate,
+    cached_speech_intervals, cached_is_complete,
+    cached_model_name, device,
+    cached_log_row=None,
+    min_silence_ms=0, min_speech_ms=0, pad_ms=0,
+    request: gr.Request = None,
+    progress=gr.Progress(),
+):
+    """Compute opposite model from cached_model_name and run retranscribe."""
+    opposite = "Large" if cached_model_name == "Base" else "Base"
+    return retranscribe_audio(
+        cached_intervals, cached_audio, cached_sample_rate,
+        cached_speech_intervals, cached_is_complete,
+        opposite, device,
+        cached_log_row=cached_log_row,
+        min_silence_ms=min_silence_ms, min_speech_ms=min_speech_ms, pad_ms=pad_ms,
+        request=request,
+        progress=progress,
+    )
+
+
+def process_audio_json(audio_data, min_silence_ms, min_speech_ms, pad_ms, model_name="Base", device="GPU"):
+    """API-only endpoint that returns just JSON (no HTML)."""
+    result = process_audio(audio_data, min_silence_ms, min_speech_ms, pad_ms, model_name, device)
+    return result[1]  # json_output is at index 1
+
+
+def save_json_export(json_data):
+    """Save JSON results to a temp file for download."""
+    import tempfile
+    import json
+
+    if not json_data or not json_data.get("segments"):
+        return None
+
+    # Create temp file with JSON
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as f:
+        json.dump(json_data, f, indent=2, ensure_ascii=False)
+        return f.name
+
+
+def _mfa_upload_and_submit(refs, audio_paths):
+    """Upload audio files and submit alignment batch to the MFA Space.
+
+    Returns (event_id, headers, base_url) so the caller can yield a progress
+    update before blocking on the SSE result stream.
+    """
+    import requests
+
+    hf_token = os.environ.get("HF_TOKEN", "")
+    headers = {}
+    if hf_token:
+        headers["Authorization"] = f"Bearer {hf_token}"
+    print(f"[MFA_TS] HF_TOKEN={'set' if hf_token else 'NOT SET'}")
+
+    base = MFA_SPACE_URL
+    print(f"[MFA_TS] MFA base URL: {base}")
+
+    # Upload all audio files in a single batched request
+    files_payload = []
+    open_handles = []
+    for path in audio_paths:
+        fh = open(path, "rb")
+        open_handles.append(fh)
+        files_payload.append(("files", (os.path.basename(path), fh, "audio/wav")))
+    try:
+        resp = requests.post(
+            f"{base}/gradio_api/upload",
+            headers=headers,
+            files=files_payload,
+            timeout=MFA_TIMEOUT,
+        )
+        resp.raise_for_status()
+        uploaded_paths = resp.json()
+    finally:
+        for fh in open_handles:
+            fh.close()
+
+    # Build FileData objects
+    file_data_list = [
+        {"path": p, "meta": {"_type": "gradio.FileData"}}
+        for p in uploaded_paths
+    ]
+
+    # Submit batch alignment
+    submit_resp = requests.post(
+        f"{base}/gradio_api/call/align_batch",
+        headers={**headers, "Content-Type": "application/json"},
+        json={"data": [refs, file_data_list]},
+        timeout=MFA_TIMEOUT,
+    )
+    submit_resp.raise_for_status()
+    event_id = submit_resp.json()["event_id"]
+    print(f"[MFA_TS] Submitted batch, event_id={event_id}")
+
+    return event_id, headers, base
+
+
+def _mfa_wait_result(event_id, headers, base):
+    """Wait for the MFA SSE stream and return parsed results list."""
+    import requests
+    import json
+
+    sse_resp = requests.get(
+        f"{base}/gradio_api/call/align_batch/{event_id}",
+        headers=headers,
+        stream=True,
+        timeout=MFA_TIMEOUT,
+    )
+    sse_resp.raise_for_status()
+
+    result_data = None
+    for line in sse_resp.iter_lines(decode_unicode=True):
+        if line and line.startswith("data: "):
+            result_data = line[6:]  # strip "data: " prefix
+
+    if result_data is None:
+        raise RuntimeError("No data received from MFA align_batch SSE stream")
+
+    parsed = json.loads(result_data)
+    # Gradio wraps the return value in a list
+    if isinstance(parsed, list) and len(parsed) == 1:
+        parsed = parsed[0]
+
+    if parsed.get("status") != "ok":
+        raise RuntimeError(f"MFA align_batch failed: {parsed.get('error', parsed)}")
+
+    return parsed["results"]
+
+
+def _ts_progress_bar_html(total_segments, rate, animated=True):
+    """Return HTML for a progress bar showing Segment x/N.
+
+    When *animated* is False the bar is static at 0 %. When True the CSS fill
+    animation runs and an img-onerror trick drives the text counter (since
+    Gradio innerHTML doesn't execute <script> tags).
+    """
+    import random
+    duration = total_segments * rate
+    uid = f"tspb{random.randint(0, 999999)}"
+
+    fill_anim = f"animation:{uid}-grow {duration}s linear forwards;" if animated else ""
+    keyframes = f"""<style>
+            @keyframes {uid}-grow {{
+                from {{ width:0%; }}
+                to   {{ width:100%; }}
+            }}
+        </style>""" if animated else ""
+
+    # img onerror executes JS even when injected via innerHTML
+    counter_js = f'''<img src="data:," style="display:none"
+        onerror="(function(){{
+            var t={total_segments},r={rate * 1000},c=0,
+                el=document.getElementById('{uid}-text');
+            if(!el)return;
+            var iv=setInterval(function(){{
+                c++;
+                if(c>t+1){{clearInterval(iv);return;}}
+                if(c>t){{el.textContent='Almost Done...';}}
+                else{{el.textContent='Segment '+c+'/'+t;}}
+            }},r);
+        }})()" />''' if animated else ""
+
+    return f'''<div id="{uid}" style="
+        position:relative; width:100%; height:40px;
+        background:#e5e7eb; border-radius:8px; overflow:hidden;
+        font-family:system-ui,sans-serif; font-size:14px;
+    ">
+        <div id="{uid}-fill" style="
+            position:absolute; top:0; left:0; height:100%;
+            width:0%; background:linear-gradient(90deg,#3b82f6,#2563eb);
+            border-radius:8px; {fill_anim}
+        "></div>
+        <span id="{uid}-text" style="
+            position:absolute; inset:0; display:flex;
+            align-items:center; justify-content:center;
+            color:#1f2937; font-weight:600; z-index:1;
+            text-shadow:0 0 4px rgba(255,255,255,0.8);
+        ">{'Preparing Alignment...' if not animated else f'Segment 0/{total_segments}'}</span>
+        {keyframes}
+        {counter_js}
+    </div>'''
+
+
+def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_row=None):
+    """Compute word-level timestamps via MFA forced alignment and inject into HTML.
+
+    Generator that yields (output_html, compute_ts_btn, animate_all_html, progress_bar, json_output)
+    tuples. First yield shows the animated progress bar; final yield contains results with enriched JSON
+    including word/letter timestamps.
+    """
+    import re
+    import traceback
+
+    print("[MFA_TS] compute_mfa_timestamps called")
+    print(f"[MFA_TS]   segment_dir={segment_dir}")
+    print(f"[MFA_TS]   json_output keys={list(json_output.keys()) if json_output else None}")
+    print(f"[MFA_TS]   html length={len(current_html) if current_html else 0}")
+
+    if not current_html or '<span class="word"' not in current_html:
+        print("[MFA_TS] Early return: no HTML or no word spans")
+        yield current_html, gr.update(), gr.update(), gr.update(), gr.update()
+        return
+
+    # Build refs and audio paths from structured JSON output
+    segments = json_output.get("segments", []) if json_output else []
+    print(f"[MFA_TS] {len(segments)} segments in JSON")
+    refs = []
+    audio_paths = []
+    seg_to_result_idx = {}  # Maps segment index (0-based) → result index
+
+    _BASMALA_TEXT = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم"
+    _ISTIATHA_TEXT = "أَعُوذُ بِٱللَّهِ مِنَ الشَّيْطَانِ الرَّجِيم"
+    _COMBINED_PREFIX = _ISTIATHA_TEXT + " ۝ " + _BASMALA_TEXT
+
+    for seg in segments:
+        ref_from = seg.get("ref_from", "")
+        ref_to = seg.get("ref_to", "")
+        seg_idx = seg.get("segment", 0) - 1  # 0-indexed
+        confidence = seg.get("confidence", 0)
+
+        if not ref_from or confidence <= 0:
+            continue
+
+        # Build MFA ref
+        if ref_from == ref_to:
+            mfa_ref = ref_from
+        else:
+            mfa_ref = f"{ref_from}-{ref_to}"
+
+        # Detect fused special prefix and build compound ref
+        # (skip when the ref itself is already a special like "Basmala")
+        _is_special_ref = ref_from.strip().lower() in {"basmala", "isti'adha"}
+        if not _is_special_ref:
+            matched_text = seg.get("matched_text", "")
+            if matched_text.startswith(_COMBINED_PREFIX):
+                mfa_ref = f"Isti'adha+Basmala+{mfa_ref}"
+            elif matched_text.startswith(_ISTIATHA_TEXT):
+                mfa_ref = f"Isti'adha+{mfa_ref}"
+            elif matched_text.startswith(_BASMALA_TEXT):
+                mfa_ref = f"Basmala+{mfa_ref}"
+
+        # Check audio file exists
+        audio_path = os.path.join(segment_dir, f"seg_{seg_idx}.wav") if segment_dir else None
+        if not audio_path or not os.path.exists(audio_path):
+            print(f"[MFA_TS] Skipping seg {seg_idx}: audio not found at {audio_path}")
+            continue
+
+        # Track mapping from segment index to result index
+        seg_to_result_idx[seg_idx] = len(refs)
+        refs.append(mfa_ref)
+        audio_paths.append(audio_path)
+
+    print(f"[MFA_TS] {len(refs)} refs to align: {refs[:5]}{'...' if len(refs) > 5 else ''}")
+
+    if not refs:
+        print("[MFA_TS] Early return: no valid refs/audio pairs")
+        yield current_html, gr.update(), gr.update(), gr.update(), gr.update()
+        return
+
+    # Yield 1: hide button, show static progress bar at 0/N
+    total_segments = len(refs)
+    static_bar = _ts_progress_bar_html(total_segments, MFA_PROGRESS_SEGMENT_RATE, animated=False)
+    yield (
+        gr.update(),
+        gr.update(visible=False),
+        gr.update(),
+        gr.update(value=static_bar, visible=True),
+        gr.update(),
+    )
+
+    # Upload files and submit batch (blocking — bar stays at 0/N)
+    try:
+        event_id, mfa_headers, mfa_base = _mfa_upload_and_submit(refs, audio_paths)
+    except Exception as e:
+        print(f"[MFA_TS] ERROR uploading/submitting: {e}")
+        traceback.print_exc()
+        yield (
+            gr.update(),
+            gr.update(visible=True, interactive=True, variant="primary"),
+            gr.update(),
+            gr.update(visible=False),
+            gr.update(),
+        )
+        raise
+
+    # Yield 2: switch to animated bar (counter starts now)
+    animated_bar = _ts_progress_bar_html(total_segments, MFA_PROGRESS_SEGMENT_RATE, animated=True)
+    yield (
+        gr.update(),
+        gr.update(),
+        gr.update(),
+        gr.update(value=animated_bar),
+        gr.update(),
+    )
+
+    # Wait for MFA result (blocking — animation runs client-side)
+    try:
+        results = _mfa_wait_result(event_id, mfa_headers, mfa_base)
+        print(f"[MFA_TS] Got {len(results)} results from MFA API")
+    except Exception as e:
+        print(f"[MFA_TS] ERROR waiting for MFA result: {e}")
+        traceback.print_exc()
+        yield (
+            gr.update(),
+            gr.update(visible=True, interactive=True, variant="primary"),
+            gr.update(),
+            gr.update(visible=False),
+            gr.update(),
+        )
+        raise
+
+    # Build lookup: "result_idx:location" → (start, end) from all successful results
+    # Using result_idx prefix ensures each segment has its own timestamps even for shared words
+    _SPECIAL_REFS = {"basmala", "isti'adha"}
+    word_timestamps = {}  # "result_idx:location" → (start, end)
+    letter_timestamps = {}  # "result_idx:location" → list of letter dicts with group_id
+    word_to_all_results = {}  # word_pos → [result_idx, ...] (all occurrences)
+
+    def _assign_letter_groups(letters, word_location):
+        """Assign group_id to letters sharing identical (start, end) timestamps."""
+        if not letters:
+            return []
+        result = []
+        group_id = 0
+        prev_ts = None
+        for letter in letters:
+            ts = (letter.get("start"), letter.get("end"))
+            if ts != prev_ts:
+                group_id += 1
+                prev_ts = ts
+            result.append({
+                "char": letter.get("char", ""),
+                "start": letter.get("start"),
+                "end": letter.get("end"),
+                "group_id": f"{word_location}:{group_id}",  # Unique across words
+            })
+        return result
+
+    for result_idx, result in enumerate(results):
+        if result.get("status") != "ok":
+            print(f"[MFA_TS] Segment failed: ref={result.get('ref')} error={result.get('error')}")
+            continue
+        ref = result.get("ref", "")
+        is_special = ref.strip().lower() in _SPECIAL_REFS
+        is_fused = "+" in ref
+        for word in result.get("words", []):
+            loc = word.get("location", "")
+            if loc:
+                if is_special:
+                    base_key = f"{ref}:{loc}"
+                elif is_fused and loc.startswith("0:0:"):
+                    base_key = f"{ref}:{loc}"
+                else:
+                    base_key = loc
+                key = f"{result_idx}:{base_key}"  # Prefix with result index
+                word_timestamps[key] = (word["start"], word["end"])
+                # Extract letter timestamps if available
+                letters = word.get("letters")
+                if letters:
+                    letter_timestamps[key] = _assign_letter_groups(letters, loc)
+                # Track word→result_idx mapping for lookup (regular words only)
+                if not is_special and not (is_fused and loc.startswith("0:0:")):
+                    if loc not in word_to_all_results:
+                        word_to_all_results[loc] = []
+                    word_to_all_results[loc].append(result_idx)
+
+    print(f"[MFA_TS] {len(word_timestamps)} word timestamps collected, {len(letter_timestamps)} with letter-level data")
+
+    # Build cross-word overlap groups for simultaneous highlighting
+    def _build_crossword_groups(results_list, letter_ts_dict):
+        """
+        Build mapping of (key, letter_idx) -> cross-word group_id.
+        Only checks word boundaries: last letter(s) of word N vs first letter(s) of word N+1.
+        """
+        crossword_groups = {}  # (key, idx) -> group_id
+
+        for result_idx, result in enumerate(results_list):
+            if result.get("status") != "ok":
+                continue
+            ref = result.get("ref", "")
+            is_special = ref.strip().lower() in _SPECIAL_REFS
+            is_fused = "+" in ref
+            words = result.get("words", [])
+
+            # Iterate through consecutive word pairs
+            for word_i in range(len(words) - 1):
+                word_a = words[word_i]
+                word_b = words[word_i + 1]
+
+                loc_a = word_a.get("location", "")
+                loc_b = word_b.get("location", "")
+                if not loc_a or not loc_b:
+                    continue
+
+                # Build keys for letter_timestamps lookup
+                def make_key(loc):
+                    if is_special:
+                        base_key = f"{ref}:{loc}"
+                    elif is_fused and loc.startswith("0:0:"):
+                        base_key = f"{ref}:{loc}"
+                    else:
+                        base_key = loc
+                    return f"{result_idx}:{base_key}"
+
+                key_a = make_key(loc_a)
+                key_b = make_key(loc_b)
+                letters_a = letter_ts_dict.get(key_a, [])
+                letters_b = letter_ts_dict.get(key_b, [])
+
+                if not letters_a or not letters_b:
+                    continue
+
+                # Compare last letter(s) of word A with first letter(s) of word B
+                # Check last few letters of A against first few letters of B
+                for idx_a in range(len(letters_a) - 1, max(len(letters_a) - 3, -1), -1):
+                    letter_a = letters_a[idx_a]
+                    if letter_a.get("start") is None or letter_a.get("end") is None:
+                        continue
+                    for idx_b in range(min(3, len(letters_b))):
+                        letter_b = letters_b[idx_b]
+                        if letter_b.get("start") is None or letter_b.get("end") is None:
+                            continue
+                        # Check for exact timestamp match (MFA marks simultaneous letters identically)
+                        if letter_a["start"] == letter_b["start"] and letter_a["end"] == letter_b["end"]:
+                            group_id = f"xword-{result_idx}-{word_i}"
+                            crossword_groups[(key_a, idx_a)] = group_id
+                            crossword_groups[(key_b, idx_b)] = group_id
+
+        if crossword_groups:
+            print(f"[MFA_TS] Found {len(crossword_groups)} cross-word overlapping letters")
+
+        return crossword_groups
+
+    crossword_groups = _build_crossword_groups(results, letter_timestamps)
+
+    # Post-process: extend each word's end to the start of the next word
+    # so words don't disappear between timestamps during animation.
+    import wave
+    for seg in segments:
+        ref_from = seg.get("ref_from", "")
+        ref_to = seg.get("ref_to", "")
+        seg_idx = seg.get("segment", 0) - 1
+        confidence = seg.get("confidence", 0)
+        if not ref_from or confidence <= 0:
+            continue
+        # Get result_idx for this segment (may not exist if segment was skipped)
+        result_idx = seg_to_result_idx.get(seg_idx)
+        if result_idx is None:
+            continue
+        # Find the matching MFA result and collect word locations in order
+        ref_key = f"{ref_from}-{ref_to}" if ref_from != ref_to else ref_from
+        is_special = ref_from.strip().lower() in _SPECIAL_REFS
+        # Reconstruct compound ref for fused segments
+        # (skip when the ref itself is already a special like "Basmala")
+        if not is_special:
+            matched_text = seg.get("matched_text", "")
+            if matched_text.startswith(_COMBINED_PREFIX):
+                ref_key = f"Isti'adha+Basmala+{ref_key}"
+            elif matched_text.startswith(_ISTIATHA_TEXT):
+                ref_key = f"Isti'adha+{ref_key}"
+            elif matched_text.startswith(_BASMALA_TEXT):
+                ref_key = f"Basmala+{ref_key}"
+        is_fused = "+" in ref_key
+        seg_word_locs = []
+        for result in results:
+            if result.get("ref") == ref_key and result.get("status") == "ok":
+                for w in result.get("words", []):
+                    loc = w.get("location", "")
+                    if loc:
+                        if is_special:
+                            base_key = f"{ref_key}:{loc}"
+                        elif is_fused and loc.startswith("0:0:"):
+                            base_key = f"{ref_key}:{loc}"
+                        else:
+                            base_key = loc
+                        key = f"{result_idx}:{base_key}"  # Use result_idx prefix
+                        if key in word_timestamps:
+                            seg_word_locs.append(key)
+                break
+        if not seg_word_locs:
+            continue
+        # Extend each word's end to the next word's start
+        for i in range(len(seg_word_locs) - 1):
+            cur_start, cur_end = word_timestamps[seg_word_locs[i]]
+            nxt_start, _ = word_timestamps[seg_word_locs[i + 1]]
+            if nxt_start > cur_end:
+                word_timestamps[seg_word_locs[i]] = (cur_start, nxt_start)
+        # Extend first word back to time 0 so highlight starts immediately
+        first_loc = seg_word_locs[0]
+        first_start, first_end = word_timestamps[first_loc]
+        if first_start > 0:
+            word_timestamps[first_loc] = (0, first_end)
+        # Extend last word to segment audio duration
+        last_loc = seg_word_locs[-1]
+        last_start, last_end = word_timestamps[last_loc]
+        audio_path = os.path.join(segment_dir, f"seg_{seg_idx}.wav") if segment_dir else None
+        if audio_path and os.path.exists(audio_path):
+            with wave.open(audio_path, 'rb') as wf:
+                seg_duration = wf.getnframes() / wf.getframerate()
+            if seg_duration > last_end:
+                word_timestamps[last_loc] = (last_start, seg_duration)
+
+    print(f"[MFA_TS] Post-processed timestamps: extended word ends to fill gaps")
+
+    # Inject timestamps into word spans, using segment boundaries to determine result_idx
+    # Step 1: Find all segment boundaries (position → seg_idx)
+    seg_boundaries = []  # [(position, seg_idx), ...]
+    for m in re.finditer(r'data-segment-idx="(\d+)"', current_html):
+        seg_boundaries.append((m.start(), int(m.group(1))))
+    seg_boundaries.sort(key=lambda x: x[0])
+
+    # Build segment offset lookup: seg_idx → time_from (for absolute timestamp conversion)
+    seg_offset_map = {}  # seg_idx (0-based) → time_from
+    for seg in segments:
+        idx = seg.get("segment", 0) - 1  # Convert to 0-based
+        seg_offset_map[idx] = seg.get("time_from", 0)
+
+    # Step 2: For each word span, find which segment it belongs to
+    def _get_seg_idx_at_pos(pos):
+        """Find the segment index for a position in the HTML."""
+        seg_idx = None
+        for boundary_pos, idx in seg_boundaries:
+            if boundary_pos > pos:
+                break
+            seg_idx = idx
+        return seg_idx
+
+    word_open_re = r'<span class="word"[^>]*>'
+
+    def _inject_word_ts(m):
+        orig = m.group(0)
+        pos_m = re.search(r'data-pos="([^"]+)"', orig)
+        if not pos_m:
+            return orig
+        pos = pos_m.group(1)
+        # Find which segment this word belongs to
+        seg_idx = _get_seg_idx_at_pos(m.start())
+        if seg_idx is None:
+            return orig
+        # Get expected result_idx for this segment
+        expected_result_idx = seg_to_result_idx.get(seg_idx)
+        # For regular words, use word-based mapping to find correct result_idx
+        result_idx = None
+        if pos and not pos.startswith("0:0:"):
+            candidates = word_to_all_results.get(pos, [])
+            if candidates:
+                if len(candidates) == 1:
+                    result_idx = candidates[0]
+                elif expected_result_idx in candidates:
+                    result_idx = expected_result_idx
+                else:
+                    result_idx = min(candidates, key=lambda r: abs(r - (expected_result_idx or 0)))
+        if result_idx is None:
+            result_idx = expected_result_idx
+        if result_idx is None:
+            return orig
+        # Use result_idx prefix to get segment-specific timestamp
+        key = f"{result_idx}:{pos}"
+        ts = word_timestamps.get(key)
+        if not ts:
+            return orig
+        # Convert relative timestamps to absolute by adding segment offset
+        seg_offset = seg_offset_map.get(seg_idx, 0)
+        abs_start = ts[0] + seg_offset
+        abs_end = ts[1] + seg_offset
+        # Include result_idx so char-level injection can find letter timestamps
+        return orig[:-1] + f' data-result-idx="{result_idx}" data-start="{abs_start:.4f}" data-end="{abs_end:.4f}">'
+
+    html = re.sub(word_open_re, _inject_word_ts, current_html)
+
+    # Enable per-segment animate buttons
+    html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
+
+    # Stamp char spans with MFA letter timestamps
+    import unicodedata
+
+    def _stamp_chars_with_mfa(word_m):
+        word_open = word_m.group(1)
+        word_abs_start = float(word_m.group(2))  # data-start (already correctly injected)
+        inner = word_m.group(4)
+
+        # Extract data-pos from word tag
+        pos_m = re.search(r'data-pos="([^"]+)"', word_open)
+        word_pos = pos_m.group(1) if pos_m else None
+
+        # Find result_idx from word tag's data-result-idx if available, else use mapping
+        result_idx_m = re.search(r'data-result-idx="(\d+)"', word_open)
+        if result_idx_m:
+            result_idx = int(result_idx_m.group(1))
+        else:
+            # Fallback: use word-based mapping to find correct result_idx
+            result_idx = None
+            if word_pos and not word_pos.startswith("0:0:"):
+                candidates = word_to_all_results.get(word_pos, [])
+                if candidates:
+                    if len(candidates) == 1:
+                        result_idx = candidates[0]
+                    else:
+                        # Without position info, just take the first candidate
+                        result_idx = candidates[0]
+
+        key = f"{result_idx}:{word_pos}" if result_idx is not None and word_pos else None
+
+        # Look up word's relative start from MFA to calculate offset
+        word_ts = word_timestamps.get(key) if key else None
+        mfa_letters = letter_timestamps.get(key) if key else None
+        if not mfa_letters or not word_ts:
+            return word_m.group(0)
+
+        word_rel_start = word_ts[0]  # Word's relative start from MFA
+
+        char_matches = list(re.finditer(r'<span class="char">([^<]*)</span>', inner))
+        if not char_matches:
+            return word_m.group(0)
+
+        # Match MFA letters to HTML chars
+        mfa_chars = [unicodedata.normalize("NFC", l["char"]) for l in mfa_letters]
+        html_chars = [unicodedata.normalize("NFC", m.group(1)) for m in char_matches]
+
+        # Allowed character mappings (MFA char → HTML char)
+        # ى (alef maksura) ↔ ي (ya) are visually similar and interchangeable
+        CHAR_EQUIVALENTS = {
+            'ى': 'ي',  # alef maksura → ya
+            'ي': 'ى',  # ya → alef maksura
+        }
+
+        def chars_match(mfa_c, html_c, log_substitution=False):
+            """Check if MFA char matches HTML char, including allowed equivalents."""
+            if mfa_c == html_c or html_c in mfa_c or mfa_c in html_c:
+                return True
+            # Check allowed equivalents
+            if CHAR_EQUIVALENTS.get(mfa_c) == html_c:
+                if log_substitution:
+                    print(f"[MFA_TS] Char substitution: MFA '{mfa_c}' → HTML '{html_c}' (key={key})")
+                return True
+            return False
+
+        mfa_idx = 0
+        char_replacements = []
+        for html_idx, cm in enumerate(char_matches):
+            html_char = html_chars[html_idx]
+            if mfa_idx < len(mfa_letters):
+                mfa_char = mfa_chars[mfa_idx]
+                if chars_match(mfa_char, html_char, log_substitution=True):
+                    letter = mfa_letters[mfa_idx]
+                    # Skip letters without valid timestamps
+                    if letter["start"] is None or letter["end"] is None:
+                        print(f"[MFA_TS] Skipping letter with missing timestamp: char='{letter.get('char')}' key={key} mfa_idx={mfa_idx}")
+                        if chars_match(mfa_char, html_char) or len(html_char) >= len(mfa_char):
+                            mfa_idx += 1
+                        continue
+                    # Convert letter timestamps using word anchor
+                    # word_abs_start is already correct from word-level injection
+                    # letter times are relative to segment, so offset by (letter_start - word_rel_start)
+                    abs_start = word_abs_start + (letter["start"] - word_rel_start)
+                    abs_end = word_abs_start + (letter["end"] - word_rel_start)
+                    # Determine group_id: prefer cross-word group if exists, else use MFA's
+                    crossword_gid = crossword_groups.get((key, mfa_idx), "")
+                    final_group_id = crossword_gid or letter.get("group_id", "")
+                    char_replacements.append((
+                        cm.start(), cm.end(),
+                        f'<span class="char" data-start="{abs_start:.4f}" data-end="{abs_end:.4f}" data-group-id="{final_group_id}">{cm.group(1)}</span>'
+                    ))
+                    if chars_match(mfa_char, html_char) or len(html_char) >= len(mfa_char):
+                        mfa_idx += 1
+
+        # Apply replacements in reverse order
+        stamped_inner = inner
+        for start, end, replacement in reversed(char_replacements):
+            stamped_inner = stamped_inner[:start] + replacement + stamped_inner[end:]
+
+        return f'{word_open}{stamped_inner}</span>'
+
+    html = re.sub(
+        r'(<span class="word"(?:\s+data-pos="[^"]*")?(?:\s+data-result-idx="\d+")?\s+data-start="([\d.]+)"\s+data-end="([\d.]+)">)((?:<span class="char">.*?</span>)+)</span>',
+        _stamp_chars_with_mfa,
+        html,
+    )
+
+    print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
+
+    # Log word and char timestamps to usage logger
+    if cached_log_row is not None:
+        try:
+            import json as _json
+            from utils.usage_logger import update_word_timestamps
+            _ts_log = []
+            _char_ts_log = []
+            for result in results:
+                if result.get("status") != "ok":
+                    continue
+                _ts_log.append({
+                    "ref": result.get("ref", ""),
+                    "words": [
+                        {"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
+                        for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
+                    ],
+                })
+                # Collect char-level timestamps
+                _char_ts_log.append({
+                    "ref": result.get("ref", ""),
+                    "words": [
+                        {
+                            "word": w.get("word", ""),
+                            "location": w.get("location", ""),
+                            "letters": [
+                                {"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
+                                for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
+                            ],
+                        }
+                        for w in result.get("words", []) if w.get("letters")
+                    ],
+                })
+            update_word_timestamps(
+                cached_log_row,
+                _json.dumps(_ts_log),
+                _json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
+            )
+        except Exception as e:
+            print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
+
+    # Build enriched JSON with word/letter timestamps (relative to segment)
+    from src.quran_index import get_quran_index
+    index = get_quran_index()
+
+    def _get_word_text(location: str) -> str:
+        """Look up word text from Quran index by location (surah:ayah:word)."""
+        if not location or location.startswith("0:0:"):
+            return ""  # Special segments (Basmala/Isti'adha) use 0:0:N
+        try:
+            parts = location.split(":")
+            if len(parts) >= 3:
+                key = (int(parts[0]), int(parts[1]), int(parts[2]))
+                idx = index.word_lookup.get(key)
+                if idx is not None:
+                    return index.words[idx].display_text
+        except (ValueError, IndexError):
+            pass
+        return ""
+
+    enriched_segments = []
+    for seg in segments:
+        seg_idx = seg.get("segment", 0) - 1
+        result_idx = seg_to_result_idx.get(seg_idx)
+
+        segment_data = dict(seg)  # Copy original segment data
+
+        if result_idx is not None:
+            # For special segments (Basmala/Isti'adha), get words from matched_text
+            is_special = seg.get("ref_from", "").lower() in {"basmala", "isti'adha"}
+            special_words = seg.get("matched_text", "").split() if is_special else []
+
+            # Find matching MFA result for this segment
+            for i, result in enumerate(results):
+                if i != result_idx or result.get("status") != "ok":
+                    continue
+                words_with_ts = []
+                for word_idx, word in enumerate(result.get("words", [])):
+                    if word.get("start") is None or word.get("end") is None:
+                        continue
+
+                    location = word.get("location", "")
+
+                    # Get word text: from matched_text for special, from index for regular
+                    if is_special or location.startswith("0:0:"):
+                        word_text = special_words[word_idx] if word_idx < len(special_words) else ""
+                    else:
+                        word_text = _get_word_text(location)
+
+                    word_data = {
+                        "word": word_text,
+                        "location": location,
+                        "start": round(word["start"], 4),  # Relative to segment
+                        "end": round(word["end"], 4),
+                    }
+                    # Add letter timestamps if available
+                    if word.get("letters"):
+                        word_data["letters"] = [
+                            {
+                                "char": lt.get("char", ""),
+                                "start": round(lt["start"], 4),
+                                "end": round(lt["end"], 4),
+                            }
+                            for lt in word.get("letters", [])
+                            if lt.get("start") is not None
+                        ]
+                    words_with_ts.append(word_data)
+
+                if words_with_ts:
+                    segment_data["words"] = words_with_ts
+                break
+
+        enriched_segments.append(segment_data)
+
+    enriched_json = {"segments": enriched_segments}
+
+    # Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
+    animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
+    yield (
+        html,
+        gr.update(visible=False),
+        gr.update(value=animate_all_btn_html, visible=True),
+        gr.update(visible=False),
+        enriched_json,
+    )
+
+
+# =============================================================================
+# Build Interface
+# =============================================================================
+
+def build_interface():
+    """Build the Gradio interface."""
+
+    css = f"""
+    /* Font faces */
+    @font-face {{
+        font-family: 'DigitalKhatt';
+        src: url(data:font/otf;base64,{DIGITAL_KHATT_FONT_B64}) format('opentype');
+        font-weight: normal;
+        font-style: normal;
+    }}
+    @font-face {{
+        font-family: 'SurahName';
+        src: url(data:font/truetype;base64,{SURAH_NAME_FONT_B64}) format('truetype');
+        font-weight: normal;
+        font-style: normal;
+    }}
+
+    .arabic-text {{
+        font-family: 'DigitalKhatt', 'Traditional Arabic', sans-serif;
+        direction: rtl;
+        text-align: right;
+    }}
+
+    /* Prevent output area from being in a scrolling box */
+    .gradio-container .prose {{
+        max-height: none !important;
+    }}
+    .output-html {{
+        max-height: none !important;
+        overflow: visible !important;
+    }}
+
+    /* Segment cards - theme adaptive */
+    .segment-card {{
+        border-radius: 8px;
+        padding: 12px 16px;
+        margin-bottom: 12px;
+        border: 2px solid;
+    }}
+    .segment-header {{
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        margin-bottom: 8px;
+    }}
+    .segment-title {{
+        font-size: 13px;
+        opacity: 0.9;
+    }}
+    .segment-badges {{
+        display: flex;
+        gap: 6px;
+        align-items: center;
+    }}
+    .segment-badge {{
+        color: white;
+        padding: 2px 8px;
+        border-radius: 12px;
+        font-size: 12px;
+        font-weight: bold;
+    }}
+    .segment-audio {{
+        margin: 8px 0;
+        display: flex;
+        align-items: center;
+        gap: 8px;
+    }}
+    .segment-audio audio {{
+        flex: 1;
+        height: 32px;
+        border-radius: 4px;
+    }}
+
+    /* Lazy play button (replaces <audio controls> until clicked) */
+    .play-btn {{
+        flex: 1;
+        height: 32px;
+        border-radius: 4px;
+        border: 1px solid var(--border-color-primary, #ddd);
+        background: var(--block-background-fill, #f7f7f7);
+        cursor: pointer;
+        font-size: 16px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }}
+    .play-btn:hover {{ background: var(--block-background-fill-secondary, #eee); }}
+
+    /* Make color picker popup overlay instead of pushing content down */
+    .gradio-container .color-picker {{
+        position: relative;
+        overflow: visible !important;
+    }}
+    .gradio-container .color-picker .overflow-hidden,
+    .gradio-container .color-picker > div:last-child:not(:first-child) {{
+        position: absolute;
+        z-index: 100;
+        box-shadow: 0 4px 12px rgba(0,0,0,0.3);
+        border-radius: 8px;
+        overflow: visible !important;
+        max-height: none !important;
+    }}
+    .gradio-container .color-picker .overflow-hidden {{
+        overflow: visible !important;
+    }}
+    .gradio-container .color-picker *,
+    .gradio-container .color-picker div {{
+        overflow: visible !important;
+        max-height: none !important;
+        scrollbar-width: none !important;
+    }}
+    .gradio-container .color-picker *::-webkit-scrollbar {{
+        display: none !important;
+    }}
+    /* Ensure all color-picker ancestors allow overflow for absolute popup */
+    #anim-settings-accordion,
+    #anim-settings-accordion > *,
+    #anim-style-row,
+    #anim-style-row > * {{
+        overflow: visible !important;
+    }}
+
+
+    /* Animate button */
+    .animate-btn {{
+        background: #4a90d9 !important;
+        color: white !important;
+        border: none;
+        padding: 6px 12px;
+        border-radius: 4px;
+        cursor: pointer;
+        font-size: 12px;
+        font-weight: bold;
+        white-space: nowrap;
+    }}
+    .animate-btn:hover:not(:disabled) {{ background: #357abd !important; }}
+    .animate-btn.active {{ background: #dc3545 !important; }}
+    .animate-btn:disabled {{ background: #888 !important; cursor: not-allowed; opacity: 0.5; }}
+
+    /* Make the HTML wrapper inside ts-row match the Gradio Button wrapper */
+    #ts-row > .gr-html {{
+        padding: 0;
+        margin: 0;
+        min-width: 0;
+        flex: 1 1 0%;
+    }}
+    #ts-row > div:has(> .animate-all-btn) {{
+        padding: 0;
+        margin: 0;
+        min-width: 0;
+        flex: 1 1 0%;
+    }}
+
+    /* Animate All button — matches Gradio lg button sizing */
+    .animate-all-btn {{
+        display: block;
+        width: 100%;
+        background: var(--button-primary-background-fill, #f97316) !important;
+        color: var(--button-primary-text-color, white) !important;
+        border: var(--button-primary-border, none);
+        padding: var(--size-2, 0.5rem) var(--size-4, 1rem);
+        border-radius: var(--button-large-radius, var(--radius-lg, 8px));
+        cursor: pointer;
+        font-size: var(--button-large-text-size, var(--text-lg, 1.125rem));
+        font-weight: var(--button-large-text-weight, 600);
+        box-sizing: border-box;
+        line-height: var(--line-md, 1.5);
+        min-height: var(--size-10, 40px);
+    }}
+    .animate-all-btn:hover:not(:disabled) {{ background: var(--button-primary-background-fill-hover, #ea6c10) !important; }}
+    .animate-all-btn.active {{ background: #dc3545 !important; }}
+    .animate-all-btn:disabled {{ background: #888 !important; cursor: not-allowed; opacity: 0.5; }}
+
+    /* Mega card for Animate All */
+    .mega-card {{
+        font-family: 'DigitalKhatt', 'Traditional Arabic', sans-serif;
+        font-size: {MEGA_TEXT_SIZE_DEFAULT}px;
+        direction: rtl;
+        text-align: justify;
+        line-height: {MEGA_LINE_SPACING_DEFAULT};
+        word-spacing: {MEGA_WORD_SPACING_DEFAULT}em;
+        padding: 16px;
+        border-radius: 8px;
+        background: var(--block-background-fill);
+        max-height: 70vh;
+        overflow-y: auto;
+        scrollbar-color: rgba(255,255,255,0.2) transparent;
+    }}
+    .mega-card::-webkit-scrollbar {{ width: 8px; }}
+    .mega-card::-webkit-scrollbar-track {{ background: transparent; }}
+    .mega-card::-webkit-scrollbar-thumb {{ background: rgba(255,255,255,0.2); border-radius: 4px; }}
+    .mega-card::-webkit-scrollbar-thumb:hover {{ background: rgba(255,255,255,0.35); }}
+    .mega-text-flow {{
+        display: inline;
+    }}
+    .mega-special-line {{
+        display: block;
+        text-align: center;
+        margin: 8px 0;
+        opacity: 0.7;
+        font-size: 0.85em;
+    }}
+    .mega-surah-separator {{
+        display: block;
+        text-align: center;
+        margin: 8px 0 2px;
+        padding: 4px 0 0;
+        border-top: 1px solid rgba(255,255,255,0.1);
+        opacity: 0.8;
+        font-family: 'SurahName', sans-serif;
+        font-feature-settings: "liga" 1;
+        font-size: {MEGA_SURAH_LIGATURE_SIZE}em;
+        line-height: 1.2;
+        letter-spacing: normal;
+    }}
+    .segment-card.hidden-for-mega {{ display: none; }}
+    #mega-styling-row {{ display: none; }}
+    .mega-top-bar {{
+        display: flex; justify-content: center; gap: 8px;
+        margin-top: 12px; margin-bottom: 8px;
+    }}
+    .mega-top-bar .animate-all-btn {{
+        width: auto; min-width: 0; min-height: auto;
+        padding: 4px 12px;
+        font-size: 12px; font-weight: bold;
+        border-radius: 4px;
+        line-height: normal;
+        box-sizing: border-box;
+        border: none;
+        height: 42px;
+        display: inline-flex;
+        align-items: center;
+        justify-content: center;
+    }}
+    .mega-exit-btn {{
+        background: #6c757d;
+        color: white;
+        border: none;
+        padding: 4px 12px;
+        border-radius: 4px;
+        cursor: pointer;
+        font-size: 12px;
+        font-weight: bold;
+        min-width: 0; min-height: auto;
+        line-height: normal;
+        box-sizing: border-box;
+        height: 42px;
+        display: inline-flex;
+        align-items: center;
+        justify-content: center;
+    }}
+    .mega-exit-btn:hover {{ background: #5a6268; }}
+    .mega-speed-select {{
+        /* Use Gradio's theme-aware variables */
+        background: var(--input-background-fill) !important;
+        color: var(--body-text-color) !important;
+        border: var(--input-border-width) solid var(--input-border-color) !important;
+        border-radius: var(--input-radius) !important;
+        /* Typography from Gradio */
+        font-size: var(--input-text-size) !important;
+        font-family: var(--font) !important;
+        /* Layout */
+        padding: var(--input-padding) !important;
+        height: 48px !important;
+        min-width: 70px;
+        box-sizing: border-box;
+        /* Dropdown styling */
+        cursor: pointer;
+        -webkit-appearance: none;
+        -moz-appearance: none;
+        appearance: none;
+        /* Custom dropdown arrow using theme color */
+        background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpath fill='%236b7280' d='M5.23 7.21a.75.75 0 011.06.02L10 11.168l3.71-3.938a.75.75 0 111.08 1.04l-4.25 4.5a.75.75 0 01-1.08 0l-4.25-4.5a.75.75 0 01.02-1.06z'/%3E%3C/svg%3E") !important;
+        background-repeat: no-repeat !important;
+        background-position: right 6px center !important;
+        background-size: 16px !important;
+        padding-right: 28px !important;
+    }}
+    .mega-speed-select:hover {{
+        border-color: var(--input-border-color-hover) !important;
+    }}
+    .mega-speed-select:focus {{
+        border-color: var(--input-border-color-focus) !important;
+        outline: none;
+        box-shadow: var(--input-shadow-focus);
+    }}
+    .mega-speed-select option {{
+        background: var(--input-background-fill);
+        color: var(--body-text-color);
+    }}
+    .mega-tip {{
+        text-align: center; color: #b0b0b0; font-size: 0.95em;
+        padding: 8px 16px; margin-bottom: 6px;
+        background: rgba(255,255,255,0.05); border-radius: 8px;
+        border: 1px solid rgba(255,255,255,0.08);
+        width: fit-content; margin-left: auto; margin-right: auto;
+    }}
+
+    /* Word/char animation coloring — all modes use the window engine (JS-driven inline opacity) */
+    :root {{ --anim-word-color: {ANIM_WORD_COLOR}; }}
+    .word, .char {{
+        color: inherit;
+    }}
+    .word.active, .char.active, .word.active .char {{
+        color: var(--anim-word-color);
+    }}
+    /* Window engine: all hidden by default; JS sets inline opacity for visible window */
+    .anim-window .word {{ opacity: 0; }}
+    .anim-window .word.active {{ opacity: 1; }}
+    /* Character-level Window */
+    .anim-chars.anim-window .word {{ opacity: 1 !important; }}
+    .anim-chars.anim-window .char {{ opacity: 0; }}
+    .anim-chars.anim-window .char.active {{ opacity: 1; }}
+    /* Clickable words and verse markers in mega card */
+    .mega-text-flow .word {{ cursor: pointer; }}
+    .mega-text-flow .verse-marker {{ cursor: pointer; }}
+
+    /* Allow "All" hint below slider track to be visible */
+    #anim-window-prev, #anim-window-after {{ overflow: visible !important; padding-bottom: 1.2em; }}
+    #anim-window-prev *, #anim-window-after * {{ overflow: visible !important; }}
+    #anim-settings-accordion .block {{ border: none; }}
+    #anim-settings-accordion .color-picker {{ border: none !important; }}
+    #anim-settings-accordion .color-picker .block {{ border: none !important; }}
+
+    /* Merge style/granularity/color into one unified row */
+    #anim-style-row {{
+        gap: 0 !important;
+        border: 1px solid var(--border-color-primary, #ddd);
+        border-radius: var(--radius-lg, 8px);
+        overflow: visible;
+    }}
+    #anim-style-row > div {{
+        border: none !important;
+        box-shadow: none !important;
+        background: transparent !important;
+    }}
+    /* Side-by-side label + controls for animation settings */
+    #anim-style-row fieldset,
+    #anim-style-row > div:has(> .dialog-button) {{
+        display: flex !important;
+        flex-direction: row !important;
+        align-items: center !important;
+        gap: 8px;
+    }}
+    #anim-style-row .block-title,
+    #anim-style-row fieldset > span:first-child {{
+        white-space: nowrap;
+        min-width: fit-content;
+        margin: 0 !important;
+        font-size: 0.9em;
+    }}
+
+    .segment-text {{
+        font-family: 'DigitalKhatt', 'Traditional Arabic', sans-serif;
+        font-size: {QURAN_TEXT_SIZE_PX}px;
+        direction: rtl;
+        text-align: right;
+        line-height: 1.8;
+        word-spacing: {ARABIC_WORD_SPACING};
+        padding: 8px;
+        border-radius: 4px;
+        background: var(--block-background-fill);
+    }}
+    .segment-error {{
+        font-size: 12px;
+        margin-top: 4px;
+        color: var(--error-text-color, #dc3545);
+    }}
+    .no-match {{
+        opacity: 0.5;
+    }}
+    .no-segments {{
+        text-align: center;
+        opacity: 0.6;
+        padding: 40px;
+    }}
+    .segments-header {{
+        font-weight: bold;
+        margin-bottom: 16px;
+    }}
+
+
+    /* GPU quota warning banner */
+    .gpu-quota-warning {{
+        background: #fff7ed;
+        border: 1px solid #f97316;
+        border-radius: 8px;
+        padding: 12px 16px;
+        margin-bottom: 16px;
+        color: #9a3412;
+        font-size: 14px;
+    }}
+    @media (prefers-color-scheme: dark) {{
+        .gpu-quota-warning {{
+            background: rgba(249, 115, 22, 0.15);
+            border-color: #f97316;
+            color: #f97316;
+        }}
+    }}
+    .dark .gpu-quota-warning {{
+        background: rgba(249, 115, 22, 0.15);
+        border-color: #f97316;
+        color: #f97316;
+    }}
+
+    /* Confidence colors - light mode */
+    .segment-high {{ background: #d4edda; border-color: #28a745; }}
+    .segment-med {{ background: #fff3cd; border-color: #ffc107; }}
+    .segment-low {{ background: #f8d7da; border-color: #dc3545; }}
+    .segment-high-badge {{ background: #28a745; }}
+    .segment-med-badge {{ background: #ffc107; color: #333 !important; }}
+    .segment-low-badge {{ background: #dc3545; }}
+    .segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
+    .segment-underseg-badge {{ background: #ff8c00; }}
+
+    /* Review summary text colors */
+    .segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
+    .segment-med-text {{ color: #856404; }}
+    .segment-low-text {{ color: #721c24; }}
+    .segment-underseg-text {{ color: #b35900; }}
+    @media (prefers-color-scheme: dark) {{
+        .segment-med-text {{ color: #ffc107; }}
+        .segment-low-text {{ color: #f8d7da; }}
+        .segment-underseg-text {{ color: #ff8c00; }}
+    }}
+    .dark .segment-med-text {{ color: #ffc107; }}
+    .dark .segment-low-text {{ color: #f8d7da; }}
+    .dark .segment-underseg-text {{ color: #ff8c00; }}
+
+    /* Confidence colors - dark mode */
+    @media (prefers-color-scheme: dark) {{
+        .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
+        .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
+        .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
+        .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
+    }}
+    /* Also support Gradio's dark class */
+    .dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
+    .dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
+    .dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
+    .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
+
+    """
+
+    js = """
+    <script>
+    (function() {
+        // Warm up browser audio pipeline on first user interaction.
+        // Uses pointerdown (fires ~50-100ms before click) + AudioContext.resume()
+        // to prime the audio hardware before the <audio> element's play fires.
+        var _audioWarmedUp = false;
+        function _warmupAudio() {
+            if (_audioWarmedUp) return;
+            _audioWarmedUp = true;
+            // 1. Resume/create AudioContext — this is what actually initializes audio hardware
+            var ctx = new (window.AudioContext || window.webkitAudioContext)();
+            if (ctx.state === 'suspended') ctx.resume();
+            // Play a single silent sample to force full pipeline init
+            var buf = ctx.createBuffer(1, 1, 22050);
+            var src = ctx.createBufferSource();
+            src.buffer = buf;
+            src.connect(ctx.destination);
+            src.start();
+            // 2. Also prime HTML5 Audio path with a silent WAV
+            var a = new Audio('data:audio/wav;base64,UklGRiYAAABXQVZFZm10IBAAAAABAAEARKwAABCxAgACABAAZGF0YQIAAAAAAA==');
+            a.volume = 0;
+            a.play().catch(function(){});
+            document.removeEventListener('pointerdown', _warmupAudio);
+            document.removeEventListener('touchstart', _warmupAudio);
+        }
+        // pointerdown fires before click, giving audio hardware a head start
+        document.addEventListener('pointerdown', _warmupAudio);
+        document.addEventListener('touchstart', _warmupAudio, {passive: true});
+
+        // Display mode and granularity (updated via Animation Settings radios)
+        window.ANIM_PRESETS = """ + str(ANIM_PRESETS).replace("'", '"') + """;
+        window.ANIM_WINDOW_PREV_MAX = """ + str(ANIM_WINDOW_PREV_MAX) + """;
+        window.ANIM_WINDOW_AFTER_MAX = """ + str(ANIM_WINDOW_AFTER_MAX) + """;
+
+        // --- localStorage persistence helpers ---
+        var _ANIM_STORAGE_KEY = 'quran_anim_settings';
+        function loadAnimSettings() {
+            try {
+                var raw = localStorage.getItem(_ANIM_STORAGE_KEY);
+                return raw ? JSON.parse(raw) : null;
+            } catch(e) { return null; }
+        }
+        function saveAnimSettings() {
+            try {
+                var mode = window.ANIM_DISPLAY_MODE;
+                var s = loadAnimSettings() || {};
+                s.granularity = window.ANIM_GRANULARITY;
+                s.mode = mode;
+                s.verseOnly = !!window.ANIM_VERSE_MODE;
+                s.color = getComputedStyle(document.documentElement).getPropertyValue('--anim-word-color').trim() || '""" + ANIM_WORD_COLOR + """';
+                // Read text styling from slider DOM values (more reliable than mega-card inline styles)
+                var wsEl = document.querySelector('#anim-word-spacing input[type=range],#anim-word-spacing input[type=number]');
+                var tsEl = document.querySelector('#anim-text-size input[type=range],#anim-text-size input[type=number]');
+                var lsEl = document.querySelector('#anim-line-spacing input[type=range],#anim-line-spacing input[type=number]');
+                s.wordSpacing = wsEl ? parseFloat(wsEl.value) : """ + str(MEGA_WORD_SPACING_DEFAULT) + """;
+                s.textSize = tsEl ? parseFloat(tsEl.value) : """ + str(MEGA_TEXT_SIZE_DEFAULT) + """;
+                s.lineSpacing = lsEl ? parseFloat(lsEl.value) : """ + str(MEGA_LINE_SPACING_DEFAULT) + """;
+                // Always keep custom sub-object up to date when in Custom mode
+                if (mode === 'Custom') {
+                    s.custom = {
+                        prevOpacity: window.ANIM_OPACITY_PREV,
+                        afterOpacity: window.ANIM_OPACITY_AFTER,
+                        prevWords: window.ANIM_WINDOW_PREV,
+                        afterWords: window.ANIM_WINDOW_AFTER
+                    };
+                }
+                s.playbackRate = window.ANIM_PLAYBACK_RATE || 1;
+                localStorage.setItem(_ANIM_STORAGE_KEY, JSON.stringify(s));
+            } catch(e) {}
+        }
+
+        // --- Restore from localStorage or use defaults ---
+        var _saved = loadAnimSettings();
+        if (_saved) {
+            window.ANIM_DISPLAY_MODE = _saved.mode || '""" + ANIM_DISPLAY_MODE_DEFAULT + """';
+            window.ANIM_GRANULARITY = _saved.granularity || '""" + ANIM_GRANULARITY_DEFAULT + """';
+            window.ANIM_VERSE_MODE = !!_saved.verseOnly;
+            if (_saved.color) document.documentElement.style.setProperty('--anim-word-color', _saved.color);
+            var _rPreset = window.ANIM_PRESETS[window.ANIM_DISPLAY_MODE];
+            if (_rPreset) {
+                window.ANIM_OPACITY_PREV = _rPreset.prev_opacity;
+                window.ANIM_OPACITY_AFTER = _rPreset.after_opacity;
+                window.ANIM_WINDOW_PREV = _rPreset.prev_words;
+                window.ANIM_WINDOW_AFTER = _rPreset.after_words;
+            } else if (_saved.custom) {
+                window.ANIM_OPACITY_PREV = _saved.custom.prevOpacity;
+                window.ANIM_OPACITY_AFTER = _saved.custom.afterOpacity;
+                window.ANIM_WINDOW_PREV = _saved.custom.prevWords;
+                window.ANIM_WINDOW_AFTER = _saved.custom.afterWords;
+            } else {
+                window.ANIM_OPACITY_PREV = """ + str(ANIM_OPACITY_PREV_DEFAULT) + """;
+                window.ANIM_OPACITY_AFTER = """ + str(ANIM_OPACITY_AFTER_DEFAULT) + """;
+                window.ANIM_WINDOW_PREV = """ + str(ANIM_WINDOW_PREV_DEFAULT) + """;
+                window.ANIM_WINDOW_AFTER = """ + str(ANIM_WINDOW_AFTER_DEFAULT) + """;
+            }
+        } else {
+            window.ANIM_DISPLAY_MODE = '""" + ANIM_DISPLAY_MODE_DEFAULT + """';
+            window.ANIM_GRANULARITY = '""" + ANIM_GRANULARITY_DEFAULT + """';
+            var _initPreset = window.ANIM_PRESETS['""" + ANIM_DISPLAY_MODE_DEFAULT + """'];
+            window.ANIM_OPACITY_PREV = _initPreset ? _initPreset.prev_opacity : """ + str(ANIM_OPACITY_PREV_DEFAULT) + """;
+            window.ANIM_OPACITY_AFTER = _initPreset ? _initPreset.after_opacity : """ + str(ANIM_OPACITY_AFTER_DEFAULT) + """;
+            window.ANIM_WINDOW_PREV = _initPreset ? _initPreset.prev_words : """ + str(ANIM_WINDOW_PREV_DEFAULT) + """;
+            window.ANIM_WINDOW_AFTER = _initPreset ? _initPreset.after_words : """ + str(ANIM_WINDOW_AFTER_DEFAULT) + """;
+            window.ANIM_VERSE_MODE = false;
+        }
+        window.ANIM_PLAYBACK_RATE = (_saved && _saved.playbackRate) ? _saved.playbackRate : 1;
+        window._windowPrevGradient = [];
+        window._windowAfterGradient = [];
+
+        function rebuildWindowGradient() {
+            var basePrev = window.ANIM_OPACITY_PREV;
+            var baseAfter = window.ANIM_OPACITY_AFTER;
+            var pc = window.ANIM_WINDOW_PREV;
+            var ac = window.ANIM_WINDOW_AFTER;
+            window._windowPrevGradient = [];
+            window._windowAfterGradient = [];
+            // At max: empty gradient signals "show all at flat base opacity"
+            if (pc < window.ANIM_WINDOW_PREV_MAX) {
+                for (var d = 1; d <= pc; d++) {
+                    window._windowPrevGradient.push(String(basePrev * (pc - d + 1) / pc));
+                }
+            }
+            if (ac < window.ANIM_WINDOW_AFTER_MAX) {
+                for (var d = 1; d <= ac; d++) {
+                    window._windowAfterGradient.push(String(baseAfter * (ac - d + 1) / ac));
+                }
+            }
+        }
+        rebuildWindowGradient();
+
+        // Activate a lazy-loaded audio element (set src, show controls, hide play button)
+        function activateAudio(audio) {
+            if (audio.hasAttribute('controls')) return audio;
+            audio.src = audio.dataset.src;
+            audio.controls = true;
+            audio.style.display = '';
+            var playBtn = audio.parentElement && audio.parentElement.querySelector('.play-btn');
+            if (playBtn) playBtn.style.display = 'none';
+            return audio;
+        }
+
+        // =====================================================================
+        // Animation Debug Logging
+        // Enable via: window.ANIM_DEBUG = true; in browser console
+        // =====================================================================
+        window.ANIM_DEBUG = false;  // Set to true to enable animation debug logging
+        function animDebug(category, msg, data) {
+            if (!window.ANIM_DEBUG) return;
+            var prefix = '[ANIM:' + category + ']';
+            if (data !== undefined) {
+                console.log(prefix, msg, data);
+            } else {
+                console.log(prefix, msg);
+            }
+        }
+        function dumpCacheTimestamps(cache, label) {
+            if (!window.ANIM_DEBUG) return;
+            console.group('[ANIM:CACHE] ' + label + ' (' + cache.length + ' entries)');
+            cache.forEach(function(item, idx) {
+                var text = item.el.textContent.substring(0, 20);
+                var gid = item.groupId || '-';
+                console.log(idx + ': "' + text + '" start=' + item.start.toFixed(3) + ' end=' + item.end.toFixed(3) + ' groupId=' + gid);
+            });
+            if (cache._groupIndex) {
+                console.log('_groupIndex:', JSON.parse(JSON.stringify(cache._groupIndex)));
+            }
+            console.groupEnd();
+        }
+
+        // Cache elements and timing data for a given selector
+        // Also builds group index for letter groups (chars with same data-group-id)
+        function initCacheFor(card, selector) {
+            var elements = Array.from(card.querySelectorAll(selector));
+            var cache = elements.map(function(el, idx) {
+                return {
+                    el: el,
+                    start: parseFloat(el.dataset.start),
+                    end: parseFloat(el.dataset.end),
+                    groupId: el.dataset.groupId || null,
+                    cacheIdx: idx
+                };
+            });
+            // Build group index: groupId → [cacheIdx, ...]
+            var groupIndex = {};
+            cache.forEach(function(item) {
+                if (item.groupId) {
+                    if (!groupIndex[item.groupId]) {
+                        groupIndex[item.groupId] = [];
+                    }
+                    groupIndex[item.groupId].push(item.cacheIdx);
+                }
+            });
+            cache._groupIndex = groupIndex;
+            animDebug('INIT', 'initCacheFor("' + selector + '"): ' + cache.length + ' elements, ' + Object.keys(groupIndex).length + ' groups');
+            return cache;
+        }
+
+        // Apply class to element and all members of its letter group
+        function applyClassToGroup(cache, idx, className, add) {
+            var item = cache[idx];
+            if (!item) {
+                animDebug('CLASS', 'applyClassToGroup SKIP: no item at idx=' + idx);
+                return;
+            }
+            var text = item.el.textContent.substring(0, 10);
+            animDebug('CLASS', (add ? '+' : '-') + className + ' idx=' + idx + ' "' + text + '" groupId=' + (item.groupId || '-'));
+            if (add) {
+                item.el.classList.add(className);
+            } else {
+                item.el.classList.remove(className);
+            }
+            // Also apply to all group members if this element has a groupId
+            if (item.groupId && cache._groupIndex) {
+                var groupMembers = cache._groupIndex[item.groupId] || [];
+                if (groupMembers.length > 1) {
+                    animDebug('CLASS', '  -> propagating to group members:', groupMembers);
+                }
+                groupMembers.forEach(function(memberIdx) {
+                    if (memberIdx !== idx) {
+                        if (add) {
+                            cache[memberIdx].el.classList.add(className);
+                        } else {
+                            cache[memberIdx].el.classList.remove(className);
+                        }
+                    }
+                });
+            }
+        }
+
+        // Return the active cache based on current granularity setting
+        function getActiveCache(audio) {
+            return window.ANIM_GRANULARITY === 'Characters' ? audio._cacheChars : audio._cacheWords;
+        }
+
+        // Apply opacity to element and all members of its letter group
+        function applyOpacityToGroup(cache, idx, opacity) {
+            var item = cache[idx];
+            if (!item) {
+                animDebug('OPACITY', 'applyOpacityToGroup SKIP: no item at idx=' + idx);
+                return;
+            }
+            var text = item.el.textContent.substring(0, 10);
+            animDebug('OPACITY', 'idx=' + idx + ' "' + text + '" opacity=' + (opacity === null ? 'CLEAR' : opacity) + ' groupId=' + (item.groupId || '-'));
+            if (opacity === null) {
+                item.el.style.removeProperty('opacity');
+            } else {
+                item.el.style.opacity = opacity;
+            }
+            // Also apply to all group members if this element has a groupId
+            if (item.groupId && cache._groupIndex) {
+                var groupMembers = cache._groupIndex[item.groupId] || [];
+                if (groupMembers.length > 1) {
+                    animDebug('OPACITY', '  -> propagating to group members:', groupMembers);
+                }
+                groupMembers.forEach(function(memberIdx) {
+                    if (memberIdx !== idx) {
+                        if (opacity === null) {
+                            cache[memberIdx].el.style.removeProperty('opacity');
+                        } else {
+                            cache[memberIdx].el.style.opacity = opacity;
+                        }
+                    }
+                });
+            }
+        }
+
+        // Build group index for a cache array (extracts groupId and builds _groupIndex)
+        // Used for megacard caches which are built manually without initCacheFor()
+        function buildGroupIndex(cache) {
+            var groupIndex = {};
+            cache.forEach(function(item, idx) {
+                var gid = item.el.dataset.groupId;
+                if (gid) {
+                    item.groupId = gid;
+                    if (!groupIndex[gid]) groupIndex[gid] = [];
+                    groupIndex[gid].push(idx);
+                }
+            });
+            cache._groupIndex = groupIndex;
+            var groupCount = Object.keys(groupIndex).length;
+            animDebug('GROUP', 'buildGroupIndex: ' + cache.length + ' elements, ' + groupCount + ' groups');
+            if (window.ANIM_DEBUG && groupCount > 0) {
+                for (var gid in groupIndex) {
+                    if (groupIndex[gid].length > 1) {
+                        animDebug('GROUP', '  group "' + gid + '": indices ' + JSON.stringify(groupIndex[gid]));
+                    }
+                }
+            }
+        }
+
+        // Window mode: track active state for live slider updates
+        window._windowActiveCache = null;
+        window._windowActiveIdx = -1;
+        window._windowLastPc = 0;
+        window._windowLastAc = 0;
+        window._windowLastPcAll = false;
+        window._windowLastAcAll = false;
+        window._windowSettingsVersion = 0;  // Incremented when sliders change, so tick() can detect
+
+        // Window mode: apply per-element opacity gradient around active index
+        function applyWindowOpacity(cache, newIdx, prevIdx) {
+            animDebug('WINDOW', 'applyWindowOpacity newIdx=' + newIdx + ' prevIdx=' + prevIdx + ' cacheLen=' + cache.length);
+            // If prevIdx is unknown (e.g. after a timing gap between words),
+            // fall back to the last index we applied so the old window gets cleared.
+            if (prevIdx < 0 && window._windowActiveIdx >= 0 && window._windowActiveCache === cache) {
+                animDebug('WINDOW', '  -> using fallback prevIdx=' + window._windowActiveIdx);
+                prevIdx = window._windowActiveIdx;
+            }
+            var prevGrad = window._windowPrevGradient;
+            var afterGrad = window._windowAfterGradient;
+            var pc = prevGrad.length;
+            var ac = afterGrad.length;
+            var pcAll = (window.ANIM_WINDOW_PREV >= window.ANIM_WINDOW_PREV_MAX);
+            var acAll = (window.ANIM_WINDOW_AFTER >= window.ANIM_WINDOW_AFTER_MAX);
+            var oldPcAll = window._windowLastPcAll;
+            var oldAcAll = window._windowLastAcAll;
+            var basePrev = String(window.ANIM_OPACITY_PREV);
+            var baseAfter = String(window.ANIM_OPACITY_AFTER);
+            animDebug('WINDOW', '  verseMode=' + window.ANIM_VERSE_MODE + ' pcAll=' + pcAll + ' acAll=' + acAll + ' basePrev=' + basePrev + ' baseAfter=' + baseAfter);
+            // Verse mode: show only current-verse words, hide everything else
+            if (window.ANIM_VERSE_MODE) {
+                var activeEl = cache[newIdx].el;
+                var activePos = activeEl.dataset.pos || (activeEl.closest('.word') || {}).dataset?.pos || '';
+                var vp = activePos.split(':');
+                var activeVerse = vp.length >= 2 ? vp[0] + ':' + vp[1] : '';
+                // Track which group IDs we've already handled to avoid duplicates
+                var handledGroups = {};
+                for (var i = 0; i < cache.length; i++) {
+                    // Skip if this element's group was already handled
+                    var gid = cache[i].groupId;
+                    if (gid && handledGroups[gid]) continue;
+                    if (gid) handledGroups[gid] = true;
+                    if (i === newIdx) {
+                        applyOpacityToGroup(cache, i, null);
+                        continue;
+                    }
+                    var el = cache[i].el;
+                    var pos = el.dataset.pos || (el.closest('.word') || {}).dataset?.pos || '';
+                    var wp = pos.split(':');
+                    var wverse = wp.length >= 2 ? wp[0] + ':' + wp[1] : '';
+                    if (wverse === activeVerse) {
+                        // Same verse: normal gradient opacity
+                        if (i < newIdx || !cache[i].el.classList.contains('reached')) {
+                            applyOpacityToGroup(cache, i, (i < newIdx) ? basePrev : baseAfter);
+                        }
+                    } else if (i < newIdx) {
+                        // Different verse, before current: always hide past verses
+                        applyOpacityToGroup(cache, i, '0');
+                    } else {
+                        // Different verse, after current: hide future verses in verse-only mode
+                        applyOpacityToGroup(cache, i, '0');
+                    }
+                }
+                window._windowActiveCache = cache;
+                window._windowActiveIdx = newIdx;
+                return;
+            }
+            // Fast path: All→All steady state — only 2 elements change (with group support)
+            if (prevIdx >= 0 && newIdx >= 0 && pcAll && acAll && oldPcAll && oldAcAll) {
+                applyOpacityToGroup(cache, prevIdx, basePrev);
+                applyOpacityToGroup(cache, newIdx, null);
+                window._windowActiveCache = cache;
+                window._windowActiveIdx = newIdx;
+                window._windowLastPc = pc;  window._windowLastAc = ac;
+                window._windowLastPcAll = pcAll;  window._windowLastAcAll = acAll;
+                return;
+            }
+            // Clear old window range
+            if (prevIdx >= 0) {
+                if (oldPcAll || oldAcAll) {
+                    // Previous state had "all" — clear every element
+                    for (var i = 0; i < cache.length; i++) {
+                        cache[i].el.style.removeProperty('opacity');
+                    }
+                } else {
+                    var clearPc = Math.max(pc, window._windowLastPc);
+                    var clearAc = Math.max(ac, window._windowLastAc);
+                    var clearStart = Math.max(0, prevIdx - clearPc);
+                    var clearEnd = Math.min(cache.length - 1, prevIdx + clearAc);
+                    for (var i = clearStart; i <= clearEnd; i++) {
+                        cache[i].el.style.removeProperty('opacity');
+                    }
+                }
+            }
+            // Track state for live slider updates
+            window._windowActiveCache = cache;
+            window._windowActiveIdx = newIdx;
+            window._windowLastPc = pc;
+            window._windowLastAc = ac;
+            window._windowLastPcAll = pcAll;
+            window._windowLastAcAll = acAll;
+            if (newIdx < 0) return;
+            // Apply previous elements (with group support)
+            if (pcAll) {
+                for (var i = 0; i < newIdx; i++) {
+                    applyOpacityToGroup(cache, i, basePrev);
+                }
+            } else {
+                for (var p = 0; p < pc; p++) {
+                    var idx = newIdx - (p + 1);
+                    if (idx < 0) break;
+                    applyOpacityToGroup(cache, idx, prevGrad[p]);
+                }
+            }
+            // Apply after elements (upcoming words always get opacity set for
+            // proper word-by-word animation, even if they have 'reached' from
+            // a previous segment) — with group support
+            if (acAll) {
+                for (var i = newIdx + 1; i < cache.length; i++) {
+                    applyOpacityToGroup(cache, i, baseAfter);
+                }
+            } else {
+                for (var a = 0; a < ac; a++) {
+                    var idx = newIdx + (a + 1);
+                    if (idx >= cache.length) break;
+                    applyOpacityToGroup(cache, idx, afterGrad[a]);
+                }
+            }
+            // Fade previously-active word from full opacity to its new level (with group support)
+            if (prevIdx >= 0 && prevIdx !== newIdx) {
+                var tgt = cache[prevIdx].el.style.opacity || '0';
+                applyOpacityToGroup(cache, prevIdx, tgt);
+            }
+            // Reconcile group opacities: grouped characters should appear as
+            // one visual unit, using the max opacity from any member
+            if (cache._groupIndex) {
+                var gids = Object.keys(cache._groupIndex);
+                for (var g = 0; g < gids.length; g++) {
+                    var members = cache._groupIndex[gids[g]];
+                    if (members.length <= 1) continue;
+                    // If any member is active, set all to full opacity
+                    var anyActive = false;
+                    var maxOp = -1;
+                    for (var m = 0; m < members.length; m++) {
+                        if (cache[members[m]].el.classList.contains('active')) {
+                            anyActive = true;
+                            break;
+                        }
+                        var op = cache[members[m]].el.style.opacity;
+                        if (op !== '') {
+                            var val = parseFloat(op);
+                            if (!isNaN(val) && val > maxOp) maxOp = val;
+                        }
+                    }
+                    if (anyActive) {
+                        for (var m = 0; m < members.length; m++) {
+                            cache[members[m]].el.style.opacity = '1';
+                        }
+                    } else if (maxOp > 0) {
+                        var maxOpStr = String(maxOp);
+                        for (var m = 0; m < members.length; m++) {
+                            cache[members[m]].el.style.opacity = maxOpStr;
+                        }
+                    }
+                    // maxOp <= 0: group is hidden (outside window), leave as-is
+                }
+            }
+        }
+
+        // Re-apply window opacity immediately (called when sliders change mid-animation)
+        function reapplyWindowNow() {
+            var cache = window._windowActiveCache;
+            var idx = window._windowActiveIdx;
+            if (!cache || idx < 0) return;
+            applyWindowOpacity(cache, idx, idx);
+        }
+
+        // Replace numeric value with "All" when slider is at maximum
+        function updateWindowMaxLabel(elemId, val, maxVal) {
+            var el = document.getElementById(elemId);
+            if (!el) return;
+            var numInput = el.querySelector('input[type="number"]');
+            if (!numInput) return;
+            if (val >= maxVal) {
+                numInput.style.display = 'none';
+                var maxSpan = el.querySelector('.max-label');
+                if (!maxSpan) {
+                    maxSpan = document.createElement('span');
+                    maxSpan.className = 'max-label';
+                    maxSpan.style.cssText = 'font-weight: bold; opacity: 0.85;';
+                    numInput.parentNode.insertBefore(maxSpan, numInput.nextSibling);
+                }
+                maxSpan.textContent = 'All';
+                maxSpan.style.display = '';
+            } else {
+                numInput.style.display = '';
+                var maxSpan = el.querySelector('.max-label');
+                if (maxSpan) maxSpan.style.display = 'none';
+            }
+            // Always inject a hint at the right end of the slider track
+            if (!el.querySelector('.max-hint')) {
+                var rangeWrap = el.querySelector('input[type="range"]');
+                if (rangeWrap) {
+                    var hint = document.createElement('span');
+                    hint.className = 'max-hint';
+                    hint.textContent = 'All';
+                    hint.style.cssText = 'position: absolute; right: 0; bottom: -1.2em; font-size: 0.7em; opacity: 0.5;';
+                    var parent = rangeWrap.parentNode;
+                    if (parent) {
+                        parent.style.position = 'relative';
+                        parent.appendChild(hint);
+                    }
+                }
+            }
+        }
+
+        // Expose to global scope so Gradio inline js= callbacks can call them
+        window.rebuildWindowGradient = rebuildWindowGradient;
+        window.reapplyWindowNow = reapplyWindowNow;
+        window.updateWindowMaxLabel = updateWindowMaxLabel;
+        window.saveAnimSettings = saveAnimSettings;
+        window.loadAnimSettings = loadAnimSettings;
+
+        // Inject "All" hints on slider tracks once Gradio renders them
+        setTimeout(function() {
+            updateWindowMaxLabel('anim-window-prev', window.ANIM_WINDOW_PREV, window.ANIM_WINDOW_PREV_MAX);
+            updateWindowMaxLabel('anim-window-after', window.ANIM_WINDOW_AFTER, window.ANIM_WINDOW_AFTER_MAX);
+        }, 500);
+
+        // Clear inline opacity from all words/chars in a card (Window mode cleanup)
+        // Applies mode's prev_opacity instead of removing opacity entirely
+        function clearWindowOpacity(card) {
+            var prevOp = window.ANIM_OPACITY_PREV;
+            card.querySelectorAll('.word, .char').forEach(function(el) {
+                // Apply mode's prev_opacity consistently:
+                // - Reveal/Fade (1.0): full visibility
+                // - Spotlight (0.3): dimmed
+                // - Isolate/Consume (0): hidden/disappear
+                if (prevOp >= 1) {
+                    el.style.removeProperty('opacity');
+                } else {
+                    el.style.opacity = String(prevOp);
+                }
+            });
+        }
+
+        function clearHighlights(card) {
+            card.querySelectorAll('.word.active, .word.reached, .char.active, .char.reached').forEach(function(w) {
+                w.classList.remove('active', 'reached');
+            });
+            clearWindowOpacity(card);
+            card.classList.remove('anim-window', 'anim-chars');
+        }
+
+        function stopAnimation(audio, card) {
+            if (audio._rafId) {
+                cancelAnimationFrame(audio._rafId);
+                audio._rafId = null;
+            }
+            if (card) {
+                // Apply mode's prev_opacity to last active word before clearing
+                var activeEl = card.querySelector('.word.active, .char.active');
+                if (activeEl && window.ANIM_OPACITY_PREV < 1) {
+                    activeEl.style.opacity = String(window.ANIM_OPACITY_PREV);
+                }
+                clearHighlights(card);
+            }
+        }
+
+        function startAnimation(audio, card) {
+            var lastWordIdx = -1;
+            var lastGranularity = window.ANIM_GRANULARITY;
+            var lastOpacityPrev = window.ANIM_OPACITY_PREV;
+            var lastSeenVersion = window._windowSettingsVersion;
+            // Segment audio is trimmed, so currentTime starts at 0.
+            // Word timestamps are absolute, so we need to add segment offset.
+            var segOffset = parseFloat(card.dataset.startTime) || 0;
+
+            function tick() {
+                if (audio.paused || audio.ended) return;
+                var wordCache = getActiveCache(audio);
+                var currentTime = audio.currentTime + segOffset;
+
+                // Granularity switched mid-animation — clear old highlights and reset
+                if (window.ANIM_GRANULARITY !== lastGranularity) {
+                    animDebug('TICK', 'Granularity changed: ' + lastGranularity + ' -> ' + window.ANIM_GRANULARITY);
+                    card.querySelectorAll('.word.active, .word.reached, .char.active, .char.reached').forEach(function(w) {
+                        w.classList.remove('active', 'reached');
+                    });
+                    clearWindowOpacity(card);
+                    lastWordIdx = -1;
+                    lastGranularity = window.ANIM_GRANULARITY;
+                }
+
+                // Mode changed mid-animation — refresh all reached words with new opacity
+                if (window.ANIM_OPACITY_PREV !== lastOpacityPrev) {
+                    animDebug('TICK', 'Mode changed: prevOp ' + lastOpacityPrev + ' -> ' + window.ANIM_OPACITY_PREV);
+                    card.querySelectorAll('.word.reached, .char.reached').forEach(function(el) {
+                        if (window.ANIM_OPACITY_PREV >= 1) {
+                            el.style.removeProperty('opacity');
+                        } else {
+                            el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                        }
+                    });
+                    lastOpacityPrev = window.ANIM_OPACITY_PREV;
+                }
+
+                // Slider settings changed mid-animation — reapply window opacity
+                if (window._windowSettingsVersion !== lastSeenVersion) {
+                    if (lastWordIdx >= 0) {
+                        applyWindowOpacity(wordCache, lastWordIdx, lastWordIdx);
+                    }
+                    lastSeenVersion = window._windowSettingsVersion;
+                }
+
+                var newWordIdx = -1;
+                var searchPath = '';
+                // Fast path: check current word, then next (covers ~99% of frames)
+                if (lastWordIdx >= 0 && lastWordIdx < wordCache.length &&
+                    currentTime >= wordCache[lastWordIdx].start && currentTime < wordCache[lastWordIdx].end) {
+                    newWordIdx = lastWordIdx;
+                    searchPath = 'same';
+                } else if (lastWordIdx + 1 < wordCache.length &&
+                    currentTime >= wordCache[lastWordIdx + 1].start && currentTime < wordCache[lastWordIdx + 1].end) {
+                    newWordIdx = lastWordIdx + 1;
+                    searchPath = 'next';
+                } else {
+                    // Fallback: full scan (seeking, granularity switch, etc.)
+                    searchPath = 'scan';
+                    for (var i = 0; i < wordCache.length; i++) {
+                        if (currentTime >= wordCache[i].start && currentTime < wordCache[i].end) {
+                            newWordIdx = i;
+                            break;
+                        }
+                    }
+                    // Clamp to last word when past its end but audio hasn't ended yet
+                    if (newWordIdx === -1 && wordCache.length > 0 && currentTime >= wordCache[wordCache.length - 1].start) {
+                        newWordIdx = wordCache.length - 1;
+                        searchPath = 'clamp';
+                    }
+                }
+
+                // Only update DOM if word changed
+                if (newWordIdx !== lastWordIdx) {
+                    var newText = newWordIdx >= 0 ? wordCache[newWordIdx].el.textContent.substring(0, 15) : '-';
+                    animDebug('TICK', 'idx change: ' + lastWordIdx + ' -> ' + newWordIdx + ' (' + searchPath + ') t=' + currentTime.toFixed(3) + ' "' + newText + '"');
+                    if (newWordIdx === -1 && wordCache.length > 0) {
+                        // No match - log surrounding timing info
+                        var first = wordCache[0];
+                        var last = wordCache[wordCache.length - 1];
+                        animDebug('TICK', '  NO MATCH: t=' + currentTime.toFixed(3) + ' cache[0]=[' + first.start.toFixed(3) + ',' + first.end.toFixed(3) + '] cache[' + (wordCache.length-1) + ']=[' + last.start.toFixed(3) + ',' + last.end.toFixed(3) + ']');
+                    }
+                    if (lastWordIdx >= 0 && lastWordIdx < wordCache.length) {
+                        applyClassToGroup(wordCache, lastWordIdx, 'active', false);
+                        applyClassToGroup(wordCache, lastWordIdx, 'reached', true);
+                    }
+                    if (newWordIdx >= 0) {
+                        applyClassToGroup(wordCache, newWordIdx, 'active', true);
+                        if (lastWordIdx === -1) {
+                            // First highlight — catch up any skipped words (with group support)
+                            for (var j = 0; j < newWordIdx; j++) {
+                                applyClassToGroup(wordCache, j, 'reached', true);
+                            }
+                        }
+                    }
+                    if (newWordIdx >= 0) {
+                        applyWindowOpacity(wordCache, newWordIdx, lastWordIdx);
+                    }
+                    lastWordIdx = newWordIdx;
+                }
+            }
+
+            function rafLoop() {
+                tick();
+                if (!audio.paused && !audio.ended) {
+                    audio._rafId = requestAnimationFrame(rafLoop);
+                }
+            }
+            audio._rafId = requestAnimationFrame(rafLoop);
+        }
+
+        function toggleAnimation(btn) {
+            var card = btn.closest('.segment-card');
+            if (!card) return;
+            var audio = card.querySelector('audio');
+            if (!audio) return;
+
+            var isActive = btn.classList.toggle('active');
+            if (isActive) {
+                btn.textContent = 'Stop';
+                // Apply window engine class to card
+                card.classList.add('anim-window');
+                if (window.ANIM_GRANULARITY === 'Characters') {
+                    card.classList.add('anim-chars');
+                }
+                // Build both caches upfront for live granularity switching
+                audio._cacheWords = initCacheFor(card, '.segment-text .word');
+                audio._cacheChars = initCacheFor(card, '.segment-text .char');
+                animDebug('START', 'toggleAnimation: words=' + audio._cacheWords.length + ' chars=' + audio._cacheChars.length + ' granularity=' + window.ANIM_GRANULARITY);
+                dumpCacheTimestamps(audio._cacheWords, 'Words');
+                dumpCacheTimestamps(audio._cacheChars, 'Chars');
+                activateAudio(audio);
+                startAnimation(audio, card);
+                audio.play();
+            } else {
+                btn.textContent = 'Animate';
+                audio.pause();
+                stopAnimation(audio, card);
+            }
+        }
+
+
+        // =====================================================================
+        // Surah name ligature map (font triggers)
+        // =====================================================================
+        var surahLigatures = __SURAH_LIGATURES_JSON__;
+
+        // =====================================================================
+        // Animate All — continuous text stream with repetition handling
+        // =====================================================================
+        var animateAllState = {
+            active: false,
+            currentIdx: 0,
+            segments: [],   // [{startTime, endTime, cacheWords, cacheChars, wordEls}]
+            rafId: null,
+            megaCard: null,
+            textFlow: null,  // the unified .mega-text-flow container
+            btn: null,
+            unifiedCacheWords: [],
+            unifiedCacheChars: [],
+            unifiedAudio: null,  // Single audio element for entire recording
+            accordionParent: null,
+            accordionNextSibling: null,
+            completedIdx: -1
+        };
+
+        function buildMegaCard() {
+            var container = document.querySelector('.segments-container');
+            if (!container) return null;
+
+            // Create unified audio element from full audio URL
+            var fullAudioUrl = container.dataset.fullAudio;
+            var unifiedAudio = null;
+            if (fullAudioUrl) {
+                unifiedAudio = document.createElement('audio');
+                unifiedAudio.src = fullAudioUrl;
+                unifiedAudio.preload = 'auto';
+                unifiedAudio.style.display = 'none';
+            }
+
+            var cards = Array.from(container.querySelectorAll('.segment-card'));
+            var mega = document.createElement('div');
+            mega.className = 'mega-card';
+            var textFlow = document.createElement('div');
+            textFlow.className = 'mega-text-flow';
+
+            var renderedPositions = {};  // data-pos string -> DOM element in textFlow
+            var renderedCharEls = {};    // data-pos string -> Array of .char elements
+            var renderedMarkers = {};    // "surah:marker" -> true, to avoid duplicate verse markers
+            var frag = document.createDocumentFragment();
+            var segments = [];
+            var lastSurah = null;
+            var pendingSpecial = null;  // buffered special text to flush after surah separator
+
+            // Helper: flush pending special line into fragment
+            function flushPendingSpecial() {
+                if (!pendingSpecial) return;
+                var prevChild = frag.lastChild;
+                if (prevChild && prevChild.classList && prevChild.classList.contains('mega-special-line')) {
+                    prevChild.textContent += '    ' + pendingSpecial;
+                } else {
+                    var specialLine = document.createElement('div');
+                    specialLine.className = 'mega-special-line';
+                    specialLine.textContent = pendingSpecial;
+                    frag.appendChild(specialLine);
+                }
+                pendingSpecial = null;
+            }
+
+            cards.forEach(function(card) {
+                var btn = card.querySelector('.animate-btn');
+                var ref = (card.dataset.matchedRef || '').trim();
+                var isSpecial = (ref === 'Basmala' || ref === "Isti'adha");
+
+                if (!btn || btn.disabled) {
+                    // Special segment without timestamps — buffer static text
+                    if (isSpecial || ref === '') {
+                        var textEl = card.querySelector('.segment-text');
+                        if (textEl) {
+                            var txt = textEl.textContent.trim();
+                            if (txt) {
+                                if (pendingSpecial) {
+                                    pendingSpecial += '    ' + txt;
+                                } else {
+                                    pendingSpecial = txt;
+                                }
+                            }
+                        }
+                    }
+                    return;
+                }
+
+                // Animated special segment (has timestamps) — centered line with word animation
+                if (isSpecial) {
+                    var textEl = card.querySelector('.segment-text');
+                    if (!textEl) return;
+
+                    // Extract segment boundaries from card data attributes
+                    var segStartTime = parseFloat(card.dataset.startTime) || 0;
+                    var segEndTime = parseFloat(card.dataset.endTime) || 0;
+
+                    // Flush any buffered specials first
+                    flushPendingSpecial();
+
+                    var specialDiv;
+                    var prevChild = frag.lastChild;
+                    if (prevChild && prevChild.classList && prevChild.classList.contains('mega-special-line')) {
+                        specialDiv = prevChild;
+                        specialDiv.appendChild(document.createTextNode('    '));
+                    } else {
+                        specialDiv = document.createElement('div');
+                        specialDiv.className = 'mega-special-line';
+                        frag.appendChild(specialDiv);
+                    }
+
+                    var sourceWords = Array.from(textEl.querySelectorAll('.word'));
+                    var segWordEls = [];
+                    var segWordTimings = [];
+                    var segCharTimings = [];
+                    var unifiedCharEls = {};
+
+                    sourceWords.forEach(function(node) {
+                        var pos = node.dataset.pos;
+                        var clone = node.cloneNode(true);
+                        specialDiv.appendChild(document.createTextNode(' '));
+                        specialDiv.appendChild(clone);
+                        if (pos) renderedPositions[pos] = clone;
+                        segWordEls.push(clone);
+
+                        segWordTimings.push({
+                            start: parseFloat(node.dataset.start) || 0,
+                            end: parseFloat(node.dataset.end) || 0
+                        });
+
+                        var wordIdx = segWordEls.length - 1;
+                        var chars = Array.from(clone.children);
+                        unifiedCharEls[wordIdx] = chars;
+                        if (pos) renderedCharEls[pos] = chars;
+
+                        Array.from(node.children).forEach(function(srcChar) {
+                            segCharTimings.push({
+                                start: parseFloat(srcChar.dataset.start) || 0,
+                                end: parseFloat(srcChar.dataset.end) || 0,
+                                parentWordIdx: wordIdx
+                            });
+                        });
+                    });
+
+                    if (segWordEls.length === 0) return;
+
+                    var cacheWords = segWordEls.map(function(el, j) {
+                        return { el: el, start: segWordTimings[j].start, end: segWordTimings[j].end };
+                    });
+                    var cacheChars = [];
+                    var charCountPerWord = {};
+                    segCharTimings.forEach(function(ct) {
+                        var wIdx = ct.parentWordIdx;
+                        if (charCountPerWord[wIdx] === undefined) charCountPerWord[wIdx] = 0;
+                        var charIdxInWord = charCountPerWord[wIdx]++;
+                        var unifiedChars = unifiedCharEls[wIdx];
+                        if (unifiedChars && charIdxInWord < unifiedChars.length) {
+                            cacheChars.push({ el: unifiedChars[charIdxInWord], start: ct.start, end: ct.end });
+                        }
+                    });
+
+                    segments.push({
+                        startTime: segStartTime,
+                        endTime: segEndTime,
+                        cacheWords: cacheWords,
+                        cacheChars: cacheChars,
+                        wordEls: segWordEls
+                    });
+                    return;
+                }
+
+                var textEl = card.querySelector('.segment-text');
+                if (!textEl) return;
+
+                // Extract segment boundaries from card data attributes
+                var segStartTime = parseFloat(card.dataset.startTime) || 0;
+                var segEndTime = parseFloat(card.dataset.endTime) || 0;
+
+                // Detect fused special prefix: leading .word elements with :0:0: in data-pos
+                var allWords = Array.from(textEl.querySelectorAll('.word'));
+                var fusedWords = [];
+                var fusedHasTimestamps = false;
+                for (var fi = 0; fi < allWords.length; fi++) {
+                    var fpos = allWords[fi].dataset.pos || '';
+                    if (fpos && fpos.indexOf(':0:0:') !== -1) {
+                        fusedWords.push(allWords[fi]);
+                        if (allWords[fi].dataset.start) fusedHasTimestamps = true;
+                    } else if (fpos) {
+                        break;  // stop at first verse word
+                    } else {
+                        // No data-pos at all (old-style) — static fallback
+                        fusedWords = [];
+                        break;
+                    }
+                }
+
+                if (fusedWords.length > 0 && fusedHasTimestamps) {
+                    // Animated fused prefix — clone into a mega-special-line, share audio
+                    flushPendingSpecial();
+                    var fusedDiv;
+                    var prevChild = frag.lastChild;
+                    if (prevChild && prevChild.classList && prevChild.classList.contains('mega-special-line')) {
+                        fusedDiv = prevChild;
+                        fusedDiv.appendChild(document.createTextNode('    '));
+                    } else {
+                        fusedDiv = document.createElement('div');
+                        fusedDiv.className = 'mega-special-line';
+                        frag.appendChild(fusedDiv);
+                    }
+                    var fusedWordEls = [];
+                    var fusedWordTimings = [];
+                    var fusedCharTimings = [];
+                    var fusedCharEls = {};
+                    fusedWords.forEach(function(node) {
+                        var clone = node.cloneNode(true);
+                        fusedDiv.appendChild(document.createTextNode(' '));
+                        fusedDiv.appendChild(clone);
+                        var pos = node.dataset.pos;
+                        if (pos) renderedPositions[pos] = clone;
+                        fusedWordEls.push(clone);
+                        fusedWordTimings.push({
+                            start: parseFloat(node.dataset.start) || 0,
+                            end: parseFloat(node.dataset.end) || 0
+                        });
+                        var wordIdx = fusedWordEls.length - 1;
+                        var chars = Array.from(clone.children);
+                        fusedCharEls[wordIdx] = chars;
+                        if (pos) renderedCharEls[pos] = chars;
+                        Array.from(node.children).forEach(function(srcChar) {
+                            fusedCharTimings.push({
+                                start: parseFloat(srcChar.dataset.start) || 0,
+                                end: parseFloat(srcChar.dataset.end) || 0,
+                                parentWordIdx: wordIdx
+                            });
+                        });
+                    });
+                    // These fused words will be added to the same segment entry below
+                    // (they share the audio with the verse words)
+                } else if (fusedWords.length > 0) {
+                    // Fused prefix without timestamps — static text fallback
+                    var fusedTxt = fusedWords.map(function(w) { return w.textContent; }).join(' ').trim();
+                    if (fusedTxt) {
+                        if (pendingSpecial) {
+                            pendingSpecial += '    ' + fusedTxt;
+                        } else {
+                            pendingSpecial = fusedTxt;
+                        }
+                    }
+                }
+
+                var sourceWords = Array.from(textEl.querySelectorAll('.word'));
+                var segWordEls = [];    // unified DOM elements this segment animates
+                var segWordTimings = []; // {start, end} from source card
+                var segCharTimings = []; // [{start, end, parentWordIdx}, ...]
+                var unifiedCharEls = {};  // wordIdx -> Array of .char elements in unified DOM
+
+                // Iterate source childNodes to copy words + verse markers, deduplicating by data-pos
+                var childNodes = Array.from(textEl.childNodes);
+                var lastWasNew = false;  // track if the last word was newly appended
+                childNodes.forEach(function(node) {
+                    if (node.nodeType === Node.ELEMENT_NODE && node.classList && node.classList.contains('word')) {
+                        var pos = node.dataset.pos;
+                        if (!pos) return;  // skip words without data-pos
+                        if (pos.indexOf(':0:0:') !== -1) return;  // skip fused prefix words (handled above)
+                        if (renderedPositions[pos]) {
+                            // Word already in unified text — reuse existing element
+                            segWordEls.push(renderedPositions[pos]);
+                            lastWasNew = false;
+                        } else {
+                            // New word — clone and append
+                            var clone = node.cloneNode(true);
+                            // Detect surah change for separator
+                            var posParts = pos.split(':');
+                            if (posParts.length >= 1 && posParts[0]) {
+                                var wordSurah = posParts[0];
+                                if (wordSurah !== lastSurah) {
+                                    var sepDiv = document.createElement('div');
+                                    sepDiv.className = 'mega-surah-separator';
+                                    if (lastSurah === null) sepDiv.style.borderTop = 'none';
+                                    var ligKey = 'surah-' + wordSurah;
+                                    sepDiv.textContent = surahLigatures[ligKey] || wordSurah;
+                                    // Insert before any trailing special line so separator comes before basmala
+                                    var trailingSpecial = frag.lastChild;
+                                    if (trailingSpecial && trailingSpecial.classList && trailingSpecial.classList.contains('mega-special-line')) {
+                                        frag.insertBefore(sepDiv, trailingSpecial);
+                                    } else {
+                                        frag.appendChild(sepDiv);
+                                    }
+                                }
+                                lastSurah = wordSurah;
+                            }
+                            // Flush buffered special text after separator, before first word
+                            flushPendingSpecial();
+                            frag.appendChild(document.createTextNode(' '));
+                            frag.appendChild(clone);
+                            if (pos) {
+                                renderedPositions[pos] = clone;
+                            }
+                            segWordEls.push(clone);
+                            lastWasNew = true;
+                        }
+                        // Read timing from source word
+                        segWordTimings.push({
+                            start: parseFloat(node.dataset.start) || 0,
+                            end: parseFloat(node.dataset.end) || 0
+                        });
+                        // Read char timings from source and cache unified char elements
+                        var wordIdx = segWordEls.length - 1;
+                        var pos2 = node.dataset.pos;
+                        if (pos2 && renderedCharEls[pos2]) {
+                            unifiedCharEls[wordIdx] = renderedCharEls[pos2];
+                        } else {
+                            var chars = Array.from(segWordEls[wordIdx].children);
+                            unifiedCharEls[wordIdx] = chars;
+                            if (pos2) renderedCharEls[pos2] = chars;
+                        }
+                        var srcChars = Array.from(node.children);
+                        srcChars.forEach(function(srcChar) {
+                            segCharTimings.push({
+                                start: parseFloat(srcChar.dataset.start) || 0,
+                                end: parseFloat(srcChar.dataset.end) || 0,
+                                parentWordIdx: wordIdx
+                            });
+                        });
+                    } else if (node.nodeType === Node.ELEMENT_NODE && lastWasNew) {
+                        // Verse marker or other non-word element — append only if preceding word was new
+                        var markerText = node.textContent || '';
+                        var markerKey = (lastSurah || '') + ':' + markerText.trim();
+                        if (!markerText.trim() || !renderedMarkers[markerKey]) {
+                            frag.appendChild(document.createTextNode(' '));
+                            var markerSpan = document.createElement('span');
+                            markerSpan.className = 'verse-marker';
+                            markerSpan.title = 'Jump to this verse';
+                            markerSpan.appendChild(node.cloneNode(true));
+                            // Extract verse from preceding word's data-pos (surah:ayah:word)
+                            var lastWordPos = segWordEls.length > 0 ? (segWordEls[segWordEls.length - 1].dataset.pos || '') : '';
+                            var posPartsM = lastWordPos.split(':');
+                            if (posPartsM.length >= 2) markerSpan.dataset.verse = posPartsM[0] + ':' + posPartsM[1];
+                            frag.appendChild(markerSpan);
+                            if (markerText.trim()) renderedMarkers[markerKey] = true;
+                        }
+                    } else if (node.nodeType === Node.TEXT_NODE && lastWasNew) {
+                        // Verse markers are plain text nodes (e.g. ۝٢٥٥), not elements
+                        var txt = node.textContent || '';
+                        if (txt.trim()) {
+                            var markerKey = (lastSurah || '') + ':' + txt.trim();
+                            if (!renderedMarkers[markerKey]) {
+                                frag.appendChild(document.createTextNode(' '));
+                                var markerSpan2 = document.createElement('span');
+                                markerSpan2.className = 'verse-marker';
+                                markerSpan2.title = 'Jump to this verse';
+                                markerSpan2.textContent = txt.trim();
+                                var lastWordPos2 = segWordEls.length > 0 ? (segWordEls[segWordEls.length - 1].dataset.pos || '') : '';
+                                var posPartsM2 = lastWordPos2.split(':');
+                                if (posPartsM2.length >= 2) markerSpan2.dataset.verse = posPartsM2[0] + ':' + posPartsM2[1];
+                                frag.appendChild(markerSpan2);
+                                renderedMarkers[markerKey] = true;
+                            }
+                        }
+                    }
+                });
+
+                if (segWordEls.length === 0) return;
+
+                // Build caches: pair source timings with unified DOM elements
+                var cacheWords = segWordEls.map(function(el, j) {
+                    return {
+                        el: el,
+                        start: segWordTimings[j].start,
+                        end: segWordTimings[j].end
+                    };
+                });
+
+                // Build char cache using pre-collected unified char elements
+                var cacheChars = [];
+                var charCountPerWord = {};
+                segCharTimings.forEach(function(ct) {
+                    var wIdx = ct.parentWordIdx;
+                    if (charCountPerWord[wIdx] === undefined) charCountPerWord[wIdx] = 0;
+                    var charIdxInWord = charCountPerWord[wIdx]++;
+                    var unifiedChars = unifiedCharEls[wIdx];
+                    if (unifiedChars && charIdxInWord < unifiedChars.length) {
+                        cacheChars.push({
+                            el: unifiedChars[charIdxInWord],
+                            start: ct.start,
+                            end: ct.end
+                        });
+                    }
+                });
+
+                // Prepend animated fused prefix words/chars (same audio segment)
+                if (typeof fusedWordEls !== 'undefined' && fusedWordEls.length > 0 && fusedHasTimestamps) {
+                    var fusedCacheWords = fusedWordEls.map(function(el, j) {
+                        return { el: el, start: fusedWordTimings[j].start, end: fusedWordTimings[j].end };
+                    });
+                    var fusedCacheChars = [];
+                    var fusedCharCount = {};
+                    fusedCharTimings.forEach(function(ct) {
+                        var wIdx = ct.parentWordIdx;
+                        if (fusedCharCount[wIdx] === undefined) fusedCharCount[wIdx] = 0;
+                        var charIdx = fusedCharCount[wIdx]++;
+                        var chars = fusedCharEls[wIdx];
+                        if (chars && charIdx < chars.length) {
+                            fusedCacheChars.push({ el: chars[charIdx], start: ct.start, end: ct.end });
+                        }
+                    });
+                    cacheWords = fusedCacheWords.concat(cacheWords);
+                    cacheChars = fusedCacheChars.concat(cacheChars);
+                    segWordEls = fusedWordEls.concat(segWordEls);
+                    // Reset fused state for next card
+                    fusedWordEls = []; fusedHasTimestamps = false;
+                }
+
+                // Build group index for segment caches (for character grouping in animations)
+                buildGroupIndex(cacheWords);
+                buildGroupIndex(cacheChars);
+
+                segments.push({
+                    startTime: segStartTime,
+                    endTime: segEndTime,
+                    cacheWords: cacheWords,
+                    cacheChars: cacheChars,
+                    wordEls: segWordEls
+                });
+            });
+
+            // Flush any remaining buffered special text
+            flushPendingSpecial();
+
+            textFlow.appendChild(frag);
+            // Build unified caches so Window mode spans all segments.
+            // Deduplicate: each DOM element appears once (shared elements from
+            // repeated segments reuse the first segment's unified index).
+            var unifiedWords = [];
+            var unifiedChars = [];
+            var wordElToIdx = new Map();
+            var charElToIdx = new Map();
+            var elToFirstSeg = new Map();
+            segments.forEach(function(seg, sIdx) {
+                seg.sharedEls = new Set();
+                seg.unifiedWordMap = [];
+                seg.unifiedCharMap = [];
+                for (var i = 0; i < seg.cacheWords.length; i++) {
+                    var el = seg.cacheWords[i].el;
+                    if (wordElToIdx.has(el)) {
+                        seg.unifiedWordMap.push(wordElToIdx.get(el));
+                        seg.sharedEls.add(el);
+                        segments[elToFirstSeg.get(el)].sharedEls.add(el);
+                    } else {
+                        var idx = unifiedWords.length;
+                        wordElToIdx.set(el, idx);
+                        elToFirstSeg.set(el, sIdx);
+                        seg.unifiedWordMap.push(idx);
+                        unifiedWords.push(seg.cacheWords[i]);
+                    }
+                }
+                for (var i = 0; i < seg.cacheChars.length; i++) {
+                    var el = seg.cacheChars[i].el;
+                    if (charElToIdx.has(el)) {
+                        seg.unifiedCharMap.push(charElToIdx.get(el));
+                    } else {
+                        var idx = unifiedChars.length;
+                        charElToIdx.set(el, idx);
+                        seg.unifiedCharMap.push(idx);
+                        unifiedChars.push(seg.cacheChars[i]);
+                    }
+                }
+            });
+            // Tag each word/char element with its segment index for click-to-seek
+            segments.forEach(function(seg, sIdx) {
+                seg.cacheWords.forEach(function(c) { c.el.dataset.segIdx = sIdx; });
+                seg.cacheChars.forEach(function(c) { c.el.dataset.segIdx = sIdx; });
+            });
+            // Build group index for unified caches (for character grouping across all segments)
+            buildGroupIndex(unifiedWords);
+            buildGroupIndex(unifiedChars);
+
+            animateAllState.segments = segments;
+            animateAllState.unifiedCacheWords = unifiedWords;
+            animateAllState.unifiedCacheChars = unifiedChars;
+            animateAllState.textFlow = textFlow;
+            animDebug('MEGA', 'buildMegaCard: ' + segments.length + ' segments, unifiedWords=' + unifiedWords.length + ' unifiedChars=' + unifiedChars.length);
+            dumpCacheTimestamps(unifiedWords, 'Unified Words');
+            dumpCacheTimestamps(unifiedChars, 'Unified Chars');
+            mega.appendChild(textFlow);
+
+            // Build top bar with Exit button (placed outside mega card)
+            var topBar = document.createElement('div');
+            topBar.className = 'mega-top-bar';
+            var exitBtn = document.createElement('button');
+            exitBtn.className = 'mega-exit-btn';
+            exitBtn.textContent = 'Exit';
+            exitBtn.title = 'Exit and return to individual segments';
+            topBar.appendChild(exitBtn);
+            // Speed dropdown
+            var speedSelect = document.createElement('select');
+            speedSelect.className = 'mega-speed-select';
+            speedSelect.title = 'Playback speed';
+            [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2].forEach(function(r) {
+                var opt = document.createElement('option');
+                opt.value = r;
+                opt.textContent = r + 'x';
+                if (r === (window.ANIM_PLAYBACK_RATE || 1)) opt.selected = true;
+                speedSelect.appendChild(opt);
+            });
+            speedSelect.addEventListener('change', function() {
+                var rate = parseFloat(speedSelect.value);
+                window.ANIM_PLAYBACK_RATE = rate;
+                if (animateAllState.unifiedAudio) {
+                    animateAllState.unifiedAudio.playbackRate = rate;
+                }
+                saveAnimSettings();
+            });
+            topBar.appendChild(speedSelect);
+            return {mega: mega, topBar: topBar, unifiedAudio: unifiedAudio};
+        }
+
+        // Reset highlight state for backward transitions (cache-based, no DOM queries)
+        function resetHighlightsFrom(segIdx) {
+            for (var s = segIdx; s < animateAllState.segments.length; s++) {
+                var seg = animateAllState.segments[s];
+                seg.cacheWords.forEach(function(c) {
+                    c.el.classList.remove('active', 'reached');
+                    c.el.style.removeProperty('opacity');
+                });
+                seg.cacheChars.forEach(function(c) {
+                    c.el.classList.remove('active', 'reached');
+                    c.el.style.removeProperty('opacity');
+                });
+            }
+            // Re-apply 'reached' to shared elements belonging to completed earlier segments
+            for (var s = 0; s < segIdx && s <= animateAllState.completedIdx; s++) {
+                var prev = animateAllState.segments[s];
+                if (!prev.sharedEls || prev.sharedEls.size === 0) continue;
+                prev.cacheWords.forEach(function(c) {
+                    if (prev.sharedEls.has(c.el)) c.el.classList.add('reached');
+                });
+                prev.cacheChars.forEach(function(c) {
+                    if (prev.sharedEls.has(c.el)) c.el.classList.add('reached');
+                });
+            }
+        }
+
+        function startSegmentTick(seg, audio, segStartTime, segEndTime) {
+            var lastWordIdx = -1;
+            var lastGranularity = window.ANIM_GRANULARITY;
+            var lastOpacityPrev = window.ANIM_OPACITY_PREV;
+            var lastSeenVersion = window._windowSettingsVersion;
+            var segIdx = animateAllState.currentIdx;
+            animDebug('MEGA', 'startSegmentTick seg=' + segIdx + ' cacheWords=' + seg.cacheWords.length + ' cacheChars=' + seg.cacheChars.length + ' bounds=[' + segStartTime.toFixed(3) + ',' + segEndTime.toFixed(3) + ']');
+            function tick() {
+                if (audio.paused) return;
+                var currentTime = audio.currentTime;
+
+                // Check segment end boundary (unified audio spans all segments)
+                if (currentTime >= segEndTime) {
+                    onAnimateAllSegmentEnd();
+                    return;
+                }
+
+                var cache = window.ANIM_GRANULARITY === 'Characters' ? seg.cacheChars : seg.cacheWords;
+                if (window.ANIM_GRANULARITY !== lastGranularity) {
+                    animDebug('MEGA', 'Granularity changed: ' + lastGranularity + ' -> ' + window.ANIM_GRANULARITY);
+                    // Granularity changed mid-animation: reset this segment's elements
+                    seg.wordEls.forEach(function(w) {
+                        w.classList.remove('active');
+                        w.style.removeProperty('opacity');
+                        w.querySelectorAll('.char').forEach(function(c) {
+                            c.classList.remove('active');
+                            c.style.removeProperty('opacity');
+                        });
+                    });
+                    // Clear inline opacity across all segments (previous segments may have inline opacity)
+                    animateAllState.unifiedCacheWords.forEach(function(c) { c.el.style.removeProperty('opacity'); });
+                    animateAllState.unifiedCacheChars.forEach(function(c) { c.el.style.removeProperty('opacity'); });
+                    lastWordIdx = -1;
+                    lastGranularity = window.ANIM_GRANULARITY;
+                }
+
+                // Mode changed mid-animation — refresh all reached words with new opacity
+                if (window.ANIM_OPACITY_PREV !== lastOpacityPrev) {
+                    animDebug('MEGA', 'Mode changed: prevOp ' + lastOpacityPrev + ' -> ' + window.ANIM_OPACITY_PREV);
+                    animateAllState.unifiedCacheWords.forEach(function(c) {
+                        if (c.el.classList.contains('reached')) {
+                            if (window.ANIM_OPACITY_PREV >= 1) {
+                                c.el.style.removeProperty('opacity');
+                            } else {
+                                c.el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                            }
+                        }
+                    });
+                    animateAllState.unifiedCacheChars.forEach(function(c) {
+                        if (c.el.classList.contains('reached')) {
+                            if (window.ANIM_OPACITY_PREV >= 1) {
+                                c.el.style.removeProperty('opacity');
+                            } else {
+                                c.el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                            }
+                        }
+                    });
+                    lastOpacityPrev = window.ANIM_OPACITY_PREV;
+                }
+
+                // Slider settings changed mid-animation — reapply window opacity
+                if (window._windowSettingsVersion !== lastSeenVersion) {
+                    if (lastWordIdx >= 0) {
+                        var wc = window.ANIM_GRANULARITY === 'Characters'
+                            ? animateAllState.unifiedCacheChars : animateAllState.unifiedCacheWords;
+                        var map = window.ANIM_GRANULARITY === 'Characters'
+                            ? seg.unifiedCharMap : seg.unifiedWordMap;
+                        if (map[lastWordIdx] !== undefined) {
+                            applyWindowOpacity(wc, map[lastWordIdx], map[lastWordIdx]);
+                        }
+                    }
+                    lastSeenVersion = window._windowSettingsVersion;
+                }
+
+                var newWordIdx = -1;
+                var searchPath = '';
+                // Fast path: check current word, then next (covers ~99% of frames)
+                if (lastWordIdx >= 0 && lastWordIdx < cache.length &&
+                    currentTime >= cache[lastWordIdx].start && currentTime < cache[lastWordIdx].end) {
+                    newWordIdx = lastWordIdx;
+                    searchPath = 'same';
+                } else if (lastWordIdx + 1 < cache.length &&
+                    currentTime >= cache[lastWordIdx + 1].start && currentTime < cache[lastWordIdx + 1].end) {
+                    newWordIdx = lastWordIdx + 1;
+                    searchPath = 'next';
+                } else {
+                    // Fallback: full scan (seeking, granularity switch, etc.)
+                    searchPath = 'scan';
+                    for (var i = 0; i < cache.length; i++) {
+                        if (currentTime >= cache[i].start && currentTime < cache[i].end) {
+                            newWordIdx = i;
+                            break;
+                        }
+                    }
+                    // Clamp to last word when past its end but audio hasn't ended yet
+                    if (newWordIdx === -1 && cache.length > 0 && currentTime >= cache[cache.length - 1].start) {
+                        newWordIdx = cache.length - 1;
+                        searchPath = 'clamp';
+                    }
+                }
+                if (newWordIdx !== lastWordIdx) {
+                    var newText = newWordIdx >= 0 ? cache[newWordIdx].el.textContent.substring(0, 15) : '-';
+                    animDebug('MEGA', 'seg=' + segIdx + ' idx: ' + lastWordIdx + ' -> ' + newWordIdx + ' (' + searchPath + ') t=' + currentTime.toFixed(3) + ' "' + newText + '"');
+                    if (newWordIdx === -1 && cache.length > 0) {
+                        var first = cache[0];
+                        var last = cache[cache.length - 1];
+                        animDebug('MEGA', '  NO MATCH: t=' + currentTime.toFixed(3) + ' cache[0]=[' + first.start.toFixed(3) + ',' + first.end.toFixed(3) + '] cache[' + (cache.length-1) + ']=[' + last.start.toFixed(3) + ',' + last.end.toFixed(3) + ']');
+                    }
+                    if (lastWordIdx >= 0 && lastWordIdx < cache.length) {
+                        applyClassToGroup(cache, lastWordIdx, 'active', false);
+                        applyClassToGroup(cache, lastWordIdx, 'reached', true);
+                    }
+                    if (newWordIdx >= 0) {
+                        applyClassToGroup(cache, newWordIdx, 'active', true);
+                        if (lastWordIdx === -1) {
+                            // First highlight — catch up any skipped words (with group support)
+                            for (var j = 0; j < newWordIdx; j++) {
+                                applyClassToGroup(cache, j, 'reached', true);
+                            }
+                        }
+                        // auto-scroll disabled — causes jank with frequent word changes
+                    }
+                    if (newWordIdx >= 0) {
+                        var wc = window.ANIM_GRANULARITY === 'Characters'
+                            ? animateAllState.unifiedCacheChars : animateAllState.unifiedCacheWords;
+                        var map = window.ANIM_GRANULARITY === 'Characters' ? seg.unifiedCharMap : seg.unifiedWordMap;
+                        applyWindowOpacity(wc, map[newWordIdx],
+                            lastWordIdx >= 0 ? map[lastWordIdx] : -1);
+                    }
+                    lastWordIdx = newWordIdx;
+                }
+            }
+            if (animateAllState.rafId) cancelAnimationFrame(animateAllState.rafId);
+            function rafLoop() {
+                tick();
+                // Keep looping while animation is active AND still on the same segment.
+                // When tick() triggers a segment transition, a new RAF loop is started for
+                // the new segment - this old loop must stop to avoid duplicate transitions.
+                if (animateAllState.active && !audio.paused && animateAllState.currentIdx === segIdx) {
+                    animateAllState.rafId = requestAnimationFrame(rafLoop);
+                }
+            }
+            animateAllState.rafId = requestAnimationFrame(rafLoop);
+        }
+
+        function animateSegment(idx, seekTime) {
+            var seg = animateAllState.segments[idx];
+            if (!seg) return;
+            animateAllState.currentIdx = idx;
+            // Re-tag shared elements so click-to-seek targets this segment during playback
+            seg.cacheWords.forEach(function(c) { c.el.dataset.segIdx = idx; });
+            seg.cacheChars.forEach(function(c) { c.el.dataset.segIdx = idx; });
+            var audio = animateAllState.unifiedAudio;
+            // Apply window engine class to mega card
+            var mega = animateAllState.megaCard;
+            mega.classList.add('anim-window');
+            if (window.ANIM_GRANULARITY === 'Characters') {
+                mega.classList.add('anim-chars');
+            }
+            // Detect backward transition: jumping to a segment at or before the last completed one
+            if (idx <= animateAllState.completedIdx) {
+                resetHighlightsFrom(idx);
+                // Reset completedIdx so forward progress from here is tracked correctly
+                animateAllState.completedIdx = idx - 1;
+            }
+            // Determine seek position: use provided seekTime or default to segment start
+            var targetTime = (seekTime !== undefined) ? seekTime : seg.startTime;
+            // Start playback and polling with segment boundaries
+            startSegmentTick(seg, audio, seg.startTime, seg.endTime);
+            audio.currentTime = targetTime;
+            audio.playbackRate = window.ANIM_PLAYBACK_RATE || 1;
+            audio.play();
+            // No per-segment preloading needed — unified audio already loaded
+        }
+
+        function seekToWord(wordEl) {
+            if (!animateAllState.active) return;
+            var cache = window.ANIM_GRANULARITY === 'Characters'
+                ? animateAllState.unifiedCacheChars : animateAllState.unifiedCacheWords;
+            // Find unified index and start time for the clicked word
+            var targetIdx = -1;
+            var wordStartTime = 0;
+            // For char granularity, find the first char of the clicked word
+            if (window.ANIM_GRANULARITY === 'Characters') {
+                for (var i = 0; i < cache.length; i++) {
+                    if (cache[i].el.parentElement === wordEl || cache[i].el === wordEl) {
+                        targetIdx = i;
+                        wordStartTime = cache[i].start;
+                        break;
+                    }
+                }
+            } else {
+                for (var i = 0; i < cache.length; i++) {
+                    if (cache[i].el === wordEl) {
+                        targetIdx = i;
+                        wordStartTime = cache[i].start;
+                        break;
+                    }
+                }
+            }
+            if (targetIdx < 0) return;
+            // Derive segment from word timing (more reliable than data-segIdx for shared elements)
+            var segIdx = 0;
+            for (var s = 0; s < animateAllState.segments.length; s++) {
+                var seg = animateAllState.segments[s];
+                if (wordStartTime >= seg.startTime && wordStartTime < seg.endTime) {
+                    segIdx = s;
+                    break;
+                }
+            }
+            // Reset window cache to force full reapplication (not fast path)
+            window._windowActiveIdx = -1;
+            window._windowLastPcAll = false;
+            window._windowLastAcAll = false;
+            // Update highlight state: words before target = reached, target onward = clean
+            for (var i = 0; i < cache.length; i++) {
+                cache[i].el.classList.remove('active');
+                cache[i].el.style.removeProperty('opacity');
+                if (i < targetIdx) {
+                    cache[i].el.classList.add('reached');
+                } else {
+                    cache[i].el.classList.remove('reached');
+                }
+            }
+            // Also sync the other cache (chars if in word mode, words if in char mode)
+            var otherCache = window.ANIM_GRANULARITY === 'Characters'
+                ? animateAllState.unifiedCacheWords : animateAllState.unifiedCacheChars;
+            for (var i = 0; i < otherCache.length; i++) {
+                otherCache[i].el.classList.remove('active');
+                otherCache[i].el.style.removeProperty('opacity');
+            }
+            // Re-apply window opacity immediately for the new position
+            applyWindowOpacity(cache, targetIdx, -1);
+            // Switch segment or seek within current (unified audio)
+            var audio = animateAllState.unifiedAudio;
+            if (animateAllState.rafId) {
+                cancelAnimationFrame(animateAllState.rafId);
+                animateAllState.rafId = null;
+            }
+            if (segIdx !== animateAllState.currentIdx) {
+                // Different segment — restart tick loop with new segment boundaries
+                animateSegment(segIdx, wordStartTime);
+            } else {
+                // Same segment — restart tick loop at new position for clean state
+                audio.currentTime = wordStartTime;
+                var seg = animateAllState.segments[segIdx];
+                var segStartTime = segIdx > 0 ? animateAllState.segments[segIdx - 1].endTime : 0;
+                startSegmentTick(seg, audio, segStartTime, seg.endTime);
+            }
+        }
+
+        function seekToVerseMarker(markerEl) {
+            if (!animateAllState.active) return;
+            var verse = markerEl.dataset.verse;  // e.g. "2:255"
+            if (!verse) return;
+            var prefix = verse + ':';
+            // Find first word in unified cache whose data-pos starts with this verse
+            var cache = animateAllState.unifiedCacheWords;
+            for (var i = 0; i < cache.length; i++) {
+                var pos = cache[i].el.dataset.pos || '';
+                if (pos === verse || pos.indexOf(prefix) === 0) {
+                    seekToWord(cache[i].el);
+                    return;
+                }
+            }
+        }
+
+        function onAnimateAllSegmentEnd() {
+            if (!animateAllState.active) return;
+            var seg = animateAllState.segments[animateAllState.currentIdx];
+            if (seg) {
+                // Mark all words/chars in finished segment as reached.
+                // Use mode's prev_opacity for completed elements.
+                seg.cacheWords.forEach(function(c) {
+                    if (c.el.classList.contains('active')) {
+                        c.el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                    }
+                    c.el.classList.remove('active');
+                    c.el.classList.add('reached');
+                });
+                seg.cacheChars.forEach(function(c) {
+                    if (c.el.classList.contains('active')) {
+                        c.el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                    }
+                    c.el.classList.remove('active');
+                    c.el.classList.add('reached');
+                });
+                animateAllState.completedIdx = animateAllState.currentIdx;
+            }
+            var nextIdx = animateAllState.currentIdx + 1;
+            if (nextIdx < animateAllState.segments.length) {
+                // Unified audio continues playing — just switch segment tracking
+                var nextSeg = animateAllState.segments[nextIdx];
+                animateAllState.currentIdx = nextIdx;
+                // Re-tag shared elements for the new segment
+                nextSeg.cacheWords.forEach(function(c) { c.el.dataset.segIdx = nextIdx; });
+                nextSeg.cacheChars.forEach(function(c) { c.el.dataset.segIdx = nextIdx; });
+                // Restart tick loop with new segment boundaries
+                startSegmentTick(nextSeg, animateAllState.unifiedAudio, nextSeg.startTime, nextSeg.endTime);
+            } else {
+                // All segments done — tear down after brief delay
+                setTimeout(function() { stopAnimateAll(); }, 500);
+            }
+        }
+
+        // Pause: keep mega card and position, just stop playback
+        function pauseAnimateAll() {
+            if (animateAllState.rafId) {
+                cancelAnimationFrame(animateAllState.rafId);
+                animateAllState.rafId = null;
+            }
+            if (animateAllState.unifiedAudio) animateAllState.unifiedAudio.pause();
+            animateAllState.active = false;
+
+            // Fade last active word to mode's prev_opacity
+            var seg = animateAllState.segments[animateAllState.currentIdx];
+            if (seg) {
+                var cache = window.ANIM_GRANULARITY === 'Characters' ? seg.cacheChars : seg.cacheWords;
+                cache.forEach(function(c) {
+                    if (c.el.classList.contains('active')) {
+                        c.el.style.opacity = String(window.ANIM_OPACITY_PREV);
+                    }
+                });
+            }
+
+            if (animateAllState.btn) {
+                animateAllState.btn.classList.remove('active');
+                animateAllState.btn.textContent = 'Resume';
+            }
+        }
+
+        // Full teardown: remove mega card, restore segment cards
+        function stopAnimateAll() {
+            if (animateAllState.rafId) {
+                cancelAnimationFrame(animateAllState.rafId);
+                animateAllState.rafId = null;
+            }
+            if (animateAllState.unifiedAudio) animateAllState.unifiedAudio.pause();
+            // Move Stop/Resume button back to its original parent before removing mega card
+            if (animateAllState.btn && animateAllState.btnParent) {
+                animateAllState.btnParent.appendChild(animateAllState.btn);
+            }
+            // Remove tip callout, top bar, and mega card
+            var tipEl = document.querySelector('.mega-tip');
+            if (tipEl) tipEl.parentNode.removeChild(tipEl);
+            if (animateAllState.topBar && animateAllState.topBar.parentNode) {
+                animateAllState.topBar.parentNode.removeChild(animateAllState.topBar);
+            }
+            if (animateAllState.megaCard && animateAllState.megaCard.parentNode) {
+                animateAllState.megaCard.parentNode.removeChild(animateAllState.megaCard);
+            }
+            animateAllState.megaCard = null;
+            animateAllState.topBar = null;
+            animateAllState.textFlow = null;
+            // Unhide segment cards
+            document.querySelectorAll('.segment-card.hidden-for-mega').forEach(function(c) {
+                c.classList.remove('hidden-for-mega');
+            });
+            // Restore left column and right column sizing
+            var leftCol = document.getElementById('left-col');
+            if (leftCol) leftCol.style.display = '';
+            var mainRow = document.getElementById('main-row');
+            if (mainRow) {
+                var rightCol = mainRow.querySelector(':scope > div:last-child');
+                if (rightCol) rightCol.style.flexGrow = animateAllState.savedFlexGrow || '';
+            }
+            // Restore description and API accordion
+            if (mainRow) {
+                var sibling = mainRow.parentNode.firstElementChild;
+                while (sibling && sibling !== mainRow) {
+                    sibling.style.display = '';
+                    sibling = sibling.nextElementSibling;
+                }
+            }
+            // Restore header/summary text and action buttons
+            document.querySelectorAll('.segments-header, .segments-review-summary, .gpu-quota-warning').forEach(function(el) {
+                el.style.display = '';
+            });
+            var actionRow = document.getElementById('action-btns-row');
+            if (actionRow) actionRow.style.display = '';
+            var tsRow = document.getElementById('ts-row');
+            if (tsRow) tsRow.style.display = '';
+            // Move animation settings accordion back to left column
+            var animAccordion = document.getElementById('anim-settings-accordion');
+            if (animAccordion && animateAllState.accordionParent) {
+                if (animateAllState.accordionNextSibling) {
+                    animateAllState.accordionParent.insertBefore(animAccordion, animateAllState.accordionNextSibling);
+                } else {
+                    animateAllState.accordionParent.appendChild(animAccordion);
+                }
+            }
+            // Hide mega styling sliders (only shown in megacard view)
+            var megaStylingRow = document.getElementById('mega-styling-row');
+            if (megaStylingRow) megaStylingRow.style.display = 'none';
+            // Reset button
+            if (animateAllState.btn) {
+                animateAllState.btn.classList.remove('active');
+                animateAllState.btn.textContent = 'Animate All';
+            }
+            animateAllState.active = false;
+            animateAllState.completedIdx = -1;
+            animateAllState.segments = [];
+            animateAllState.unifiedCacheWords = [];
+            animateAllState.unifiedCacheChars = [];
+            animateAllState.unifiedAudio = null;
+            animateAllState.megaCard = null;
+            animateAllState.textFlow = null;
+            window.ANIM_PLAYBACK_RATE = 1;
+        }
+
+        // Resume from paused position
+        function resumeAnimateAll() {
+            animateAllState.active = true;
+            if (animateAllState.btn) {
+                animateAllState.btn.classList.add('active');
+                animateAllState.btn.textContent = 'Stop';
+            }
+            var seg = animateAllState.segments[animateAllState.currentIdx];
+            if (!seg) return;
+            var audio = animateAllState.unifiedAudio;
+            // Re-apply animation classes
+            var mega = animateAllState.megaCard;
+            mega.classList.add('anim-window');
+            if (window.ANIM_GRANULARITY === 'Characters') {
+                mega.classList.add('anim-chars');
+            }
+            // Restart polling from current audio position
+            startSegmentTick(seg, audio, seg.startTime, seg.endTime);
+            audio.playbackRate = window.ANIM_PLAYBACK_RATE || 1;
+            audio.play();
+        }
+
+        function toggleAnimateAll(btn) {
+            // Currently playing → pause (keep mega card)
+            if (animateAllState.active) {
+                pauseAnimateAll();
+                return;
+            }
+            // Paused with mega card still in live DOM → resume
+            if (animateAllState.megaCard && document.contains(animateAllState.megaCard)) {
+                resumeAnimateAll();
+                return;
+            }
+            // Stale mega card (orphaned by Gradio HTML update) → reset state
+            if (animateAllState.megaCard) {
+                animateAllState.megaCard = null;
+                animateAllState.textFlow = null;
+                animateAllState.segments = [];
+                animateAllState.unifiedCacheWords = [];
+                animateAllState.unifiedCacheChars = [];
+                animateAllState.unifiedAudio = null;
+                animateAllState.completedIdx = -1;
+                animateAllState.active = false;
+            }
+            // Fresh start
+            document.querySelectorAll('.animate-btn.active').forEach(function(b) {
+                toggleAnimation(b);
+            });
+            var result = buildMegaCard();
+            if (!result || animateAllState.segments.length === 0) return;
+            if (!result.unifiedAudio) {
+                console.error('Animate All: unified audio not available');
+                return;
+            }
+            var mega = result.mega;
+            var topBar = result.topBar;
+            animateAllState.megaCard = mega;
+            animateAllState.topBar = topBar;
+            animateAllState.unifiedAudio = result.unifiedAudio;
+            animateAllState.btn = btn;
+            animateAllState.active = true;
+            // Hide individual segment cards
+            document.querySelectorAll('.segments-container .segment-card').forEach(function(c) {
+                c.classList.add('hidden-for-mega');
+            });
+            // Hide left column and expand right column to full width
+            var leftCol = document.getElementById('left-col');
+            if (leftCol) leftCol.style.display = 'none';
+            var mainRow = document.getElementById('main-row');
+            if (mainRow) {
+                var rightCol = mainRow.querySelector(':scope > div:last-child');
+                if (rightCol) {
+                    animateAllState.savedFlexGrow = rightCol.style.flexGrow;
+                    rightCol.style.flexGrow = '1';
+                }
+            }
+            // Hide description and API accordion (everything before main-row)
+            if (mainRow) {
+                var sibling = mainRow.parentNode.firstElementChild;
+                while (sibling && sibling !== mainRow) {
+                    sibling.style.display = 'none';
+                    sibling = sibling.nextElementSibling;
+                }
+            }
+            // Hide header/summary text and action buttons
+            document.querySelectorAll('.segments-header, .segments-review-summary, .gpu-quota-warning').forEach(function(el) {
+                el.style.display = 'none';
+            });
+            var actionRow = document.getElementById('action-btns-row');
+            if (actionRow) actionRow.style.display = 'none';
+            var tsRow = document.getElementById('ts-row');
+            if (tsRow) tsRow.style.display = 'none';
+            // Move animation settings accordion above segments container
+            var animAccordion = document.getElementById('anim-settings-accordion');
+            if (animAccordion) {
+                animateAllState.accordionParent = animAccordion.parentNode;
+                animateAllState.accordionNextSibling = animAccordion.nextElementSibling;
+                var container = document.querySelector('.segments-container');
+                if (container) container.parentNode.insertBefore(animAccordion, container);
+            }
+            // Show mega styling sliders (hidden in normal card view)
+            var megaStylingRow = document.getElementById('mega-styling-row');
+            if (megaStylingRow) megaStylingRow.style.display = 'flex';
+            var container = document.querySelector('.segments-container');
+            // Add tip callout above the mega card
+            var tip = document.createElement('div');
+            tip.className = 'mega-tip';
+            tip.textContent = 'Tip: Click on any word to seek to it, or click a verse marker to jump to the start of that verse.';
+            container.appendChild(topBar);
+            container.appendChild(tip);
+            container.appendChild(mega);
+            // Append unified audio element to mega card (hidden)
+            if (animateAllState.unifiedAudio) {
+                mega.appendChild(animateAllState.unifiedAudio);
+            }
+            // Move Stop/Resume button into top bar (above mega card)
+            btn.classList.add('active');
+            btn.textContent = 'Stop';
+            animateAllState.btnParent = btn.parentNode;
+            topBar.appendChild(btn);
+            // Start animation from first segment
+            animateSegment(0);
+        }
+
+        // Event delegation for Animate button clicks and click-to-seek
+        document.addEventListener('click', function(e) {
+            // Click-to-seek in Animate All mode
+            if (animateAllState.active && animateAllState.megaCard) {
+                var wordEl = e.target.closest('.word');
+                if (wordEl && animateAllState.megaCard.contains(wordEl)) {
+                    seekToWord(wordEl);
+                    return;
+                }
+                var markerEl = e.target.closest('.verse-marker');
+                if (markerEl && animateAllState.megaCard.contains(markerEl)) {
+                    seekToVerseMarker(markerEl);
+                    return;
+                }
+            }
+            if (e.target.matches('.play-btn')) {
+                var card = e.target.closest('.segment-card');
+                var audio = card && card.querySelector('audio');
+                if (audio) {
+                    activateAudio(audio);
+                    audio.play().catch(function(){});
+                }
+            }
+            if (e.target.matches('.animate-btn')) {
+                toggleAnimation(e.target);
+            }
+            if (e.target.matches('.animate-all-btn')) {
+                toggleAnimateAll(e.target);
+            }
+            if (e.target.matches('.mega-exit-btn')) {
+                stopAnimateAll();
+            }
+        });
+
+        // Clear highlights when audio ends (with delay so last word is visible)
+        document.addEventListener('ended', function(e) {
+            if (e.target.tagName === 'AUDIO') {
+                var audio = e.target;
+                // If Animate All is active with unified audio, handle full recording end
+                if (animateAllState.active && audio === animateAllState.unifiedAudio) {
+                    // Unified audio ended — entire recording finished
+                    setTimeout(function() { stopAnimateAll(); }, 500);
+                    return;
+                }
+                // Default per-card behavior
+                var card = audio.closest('.segment-card');
+                if (card) {
+                    setTimeout(function() {
+                        var btn = card.querySelector('.animate-btn');
+                        if (btn && btn.classList.contains('active')) {
+                            btn.classList.remove('active');
+                            btn.textContent = 'Animate';
+                            stopAnimation(audio, card);
+                        }
+                    }, 500);
+                }
+            }
+        }, true);
+
+        // Prefetch next segment's audio when current segment starts playing
+        document.addEventListener('play', function(e) {
+            if (e.target.tagName === 'AUDIO') {
+                var card = e.target.closest('.segment-card');
+                if (!card) return;
+                var next = card.nextElementSibling;
+                if (next && next.classList.contains('segment-card')) {
+                    var nextAudio = next.querySelector('audio');
+                    if (nextAudio && !nextAudio.src && nextAudio.dataset.src) {
+                        nextAudio.src = nextAudio.dataset.src;
+                        nextAudio.preload = 'metadata';
+                    }
+                }
+            }
+        }, true);
+    })();
+
+    </script>
+    """
+
+    js = js.replace('__SURAH_LIGATURES_JSON__', json.dumps(_SURAH_LIGATURES))
+
+    with gr.Blocks(title="Quran Multi-Aligner", css=css, head=js, delete_cache=(DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE)) as app:
+        gr.Markdown("# 🎙️ Quran Multi-Aligner")
+        gr.Markdown("""
+- Transcribe and split any recitation by pauses within 1-2 minutes
+- Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
+- GPU-powered API usage with daily quotas
+- Reliable confidence system to flag uncertain segments and missed words — no silent errors
+- Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
+- Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
+- [Feedback and contributions are welcome](https://huggingface.co/spaces/hetchyy/Quran-aligner/discussions)
+""")
+        
+        # API Documentation accordion
+        with gr.Accordion("📡 API Usage", open=False):
+            gr.Markdown("In progress")
+        
+        with gr.Row(elem_id="main-row"):
+            # Left column: Input
+            with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
+                audio_input = gr.Audio(
+                    label="Upload Recitation",
+                    sources=["upload", "microphone"],
+                    type="numpy"
+                )
+
+                # Example audio files — short surahs use CPU, long ones use GPU
+                with gr.Row():
+                    btn_ex_112 = gr.Button("112", size="sm", min_width=0)
+                    btn_ex_84 = gr.Button("84", size="sm", min_width=0)
+                    btn_ex_7 = gr.Button("7", size="sm", min_width=0)
+                    btn_ex_juz30 = gr.Button("Juz' 30", size="sm", min_width=0)
+
+                with gr.Accordion("Animation Settings", open=False, elem_id="anim-settings-accordion"):
+                    with gr.Row(elem_id="anim-style-row"):
+                        anim_granularity_radio = gr.Radio(
+                            choices=ANIM_GRANULARITIES,
+                            value=ANIM_GRANULARITY_DEFAULT,
+                            label="Granularity",
+                            scale=ANIM_STYLE_ROW_SCALES[0],
+                        )
+                        anim_mode_radio = gr.Radio(
+                            choices=ANIM_DISPLAY_MODES,
+                            value=ANIM_DISPLAY_MODE_DEFAULT,
+                            label="Animation Style",
+                            scale=ANIM_STYLE_ROW_SCALES[1],
+                        )
+                        anim_verse_checkbox = gr.Checkbox(
+                            value=False,
+                            label="Verse Only",
+                            elem_id="anim-verse-mode",
+                            scale=ANIM_STYLE_ROW_SCALES[2], min_width=90,
+                        )
+                        anim_color_picker = gr.ColorPicker(
+                            value=ANIM_WORD_COLOR,
+                            label="Color",
+                            scale=ANIM_STYLE_ROW_SCALES[3],
+                        )
+                    _is_custom = (ANIM_DISPLAY_MODE_DEFAULT == "Custom")
+                    _preset = ANIM_PRESETS.get(ANIM_DISPLAY_MODE_DEFAULT, {})
+                    with gr.Row():
+                        anim_opacity_prev_slider = gr.Slider(
+                            minimum=0, maximum=1, step=ANIM_OPACITY_STEP,
+                            value=_preset.get("prev_opacity", ANIM_OPACITY_PREV_DEFAULT),
+                            label="Before Opacity",
+                            interactive=_is_custom,
+                            elem_id="anim-opacity-prev",
+                        )
+                        anim_opacity_after_slider = gr.Slider(
+                            minimum=0, maximum=1, step=ANIM_OPACITY_STEP,
+                            value=_preset.get("after_opacity", ANIM_OPACITY_AFTER_DEFAULT),
+                            label="After Opacity",
+                            interactive=_is_custom,
+                            elem_id="anim-opacity-after",
+                        )
+                    with gr.Row():
+                        anim_window_prev_slider = gr.Slider(
+                            minimum=ANIM_WINDOW_PREV_MIN, maximum=ANIM_WINDOW_PREV_MAX, step=1,
+                            value=_preset.get("prev_words", ANIM_WINDOW_PREV_DEFAULT),
+                            label="Before Words", elem_id="anim-window-prev",
+                            interactive=_is_custom,
+                        )
+                        anim_window_after_slider = gr.Slider(
+                            minimum=ANIM_WINDOW_AFTER_MIN, maximum=ANIM_WINDOW_AFTER_MAX, step=1,
+                            value=_preset.get("after_words", ANIM_WINDOW_AFTER_DEFAULT),
+                            label="After Words", elem_id="anim-window-after",
+                            interactive=_is_custom,
+                        )
+                    with gr.Row(elem_id="mega-styling-row"):
+                        anim_word_spacing_slider = gr.Slider(
+                            minimum=MEGA_WORD_SPACING_MIN, maximum=MEGA_WORD_SPACING_MAX,
+                            step=MEGA_WORD_SPACING_STEP, value=MEGA_WORD_SPACING_DEFAULT,
+                            label="Word Spacing", elem_id="anim-word-spacing",
+                        )
+                        anim_text_size_slider = gr.Slider(
+                            minimum=MEGA_TEXT_SIZE_MIN, maximum=MEGA_TEXT_SIZE_MAX,
+                            step=MEGA_TEXT_SIZE_STEP, value=MEGA_TEXT_SIZE_DEFAULT,
+                            label="Text Size", elem_id="anim-text-size",
+                        )
+                        anim_line_spacing_slider = gr.Slider(
+                            minimum=MEGA_LINE_SPACING_MIN, maximum=MEGA_LINE_SPACING_MAX,
+                            step=MEGA_LINE_SPACING_STEP, value=MEGA_LINE_SPACING_DEFAULT,
+                            label="Line Spacing", elem_id="anim-line-spacing",
+                        )
+                anim_cached_settings = gr.JSON(value=None, visible=False)
+                with gr.Accordion("Model Settings", open=True):
+                    with gr.Row():
+                        model_radio = gr.Radio(
+                            choices=["Base", "Large"],
+                            value="Base",
+                            label="ASR Model",
+                            info="Large: more robust to noisy/non-studio recitations but much slower (10x bigger)"
+                        )
+                        device_radio = gr.Radio(
+                            choices=["GPU", "CPU"],
+                            value="GPU",
+                            label="Device",
+                            info="Daily GPU usage limits. Unlimitted CPU usage but slower"
+                        )
+
+                # Helper to create segmentation settings (preset buttons + sliders)
+                def _create_segmentation_settings(id_suffix=""):
+                    """Create preset buttons and sliders. Returns (silence, speech, pad, btn_muj, btn_mur, btn_fast)."""
+                    _default_silence, _default_speech, _default_pad = PRESET_MURATTAL
+                    with gr.Row():
+                        with gr.Column(scale=1, min_width=0):
+                            btn_muj = gr.Button("Mujawwad (Slow)", size="sm", variant="secondary",
+                                                elem_id=f"preset-mujawwad{id_suffix}")
+                        with gr.Column(scale=1, min_width=0):
+                            btn_mur = gr.Button("Murattal (Normal)", size="sm", variant="primary",
+                                                elem_id=f"preset-murattal{id_suffix}")
+                        with gr.Column(scale=1, min_width=0):
+                            btn_fast = gr.Button("Hadr (Fast)", size="sm", variant="secondary",
+                                                 elem_id=f"preset-fast{id_suffix}")
+
+                    silence = gr.Slider(
+                        minimum=MIN_SILENCE_MIN, maximum=MIN_SILENCE_MAX,
+                        value=_default_silence, step=MIN_SILENCE_STEP,
+                        label="Min Silence Duration (ms)",
+                        info="Shorter = more segments. Decrease for reciters who have short pauses"
+                    )
+                    speech = gr.Slider(
+                        minimum=MIN_SPEECH_MIN, maximum=MIN_SPEECH_MAX,
+                        value=_default_speech, step=MIN_SPEECH_STEP,
+                        label="Min Speech Duration (ms)",
+                        info="Speech segments shorter than this are discarded. Increase to filter out false detections"
+                    )
+                    pad = gr.Slider(
+                        minimum=PAD_MIN, maximum=PAD_MAX,
+                        value=_default_pad, step=PAD_STEP,
+                        label="Padding (ms)",
+                        info="Extra audio kept before/after each segment to avoid clipping speech edges"
+                    )
+                    return silence, speech, pad, btn_muj, btn_mur, btn_fast
+
+                def _wire_presets(btn_muj, btn_mur, btn_fast, silence, speech, pad):
+                    """Wire preset button click handlers to sliders."""
+                    presets = {
+                        "mujawwad": PRESET_MUJAWWAD,
+                        "murattal": PRESET_MURATTAL,
+                        "fast": PRESET_FAST,
+                    }
+
+                    def apply_preset(name):
+                        s, sp, p = presets[name]
+                        return (
+                            s, sp, p,
+                            gr.update(variant="primary" if name == "mujawwad" else "secondary"),
+                            gr.update(variant="primary" if name == "murattal" else "secondary"),
+                            gr.update(variant="primary" if name == "fast" else "secondary"),
+                        )
+
+                    outputs = [silence, speech, pad, btn_muj, btn_mur, btn_fast]
+                    btn_muj.click(fn=lambda: apply_preset("mujawwad"), inputs=[], outputs=outputs, api_name=False)
+                    btn_mur.click(fn=lambda: apply_preset("murattal"), inputs=[], outputs=outputs, api_name=False)
+                    btn_fast.click(fn=lambda: apply_preset("fast"), inputs=[], outputs=outputs, api_name=False)
+
+                with gr.Accordion("Segmentation Settings", open=True):
+                    min_silence_slider, min_speech_slider, pad_slider, \
+                        preset_mujawwad, preset_murattal, preset_fast = _create_segmentation_settings()
+
+                _wire_presets(preset_mujawwad, preset_murattal, preset_fast,
+                              min_silence_slider, min_speech_slider, pad_slider)
+
+                # JSON download appears here after extraction
+                export_file = gr.File(label="📥 Download JSON", visible=True, interactive=False)
+
+            # Right column: Output
+            with gr.Column(scale=RIGHT_COLUMN_SCALE):
+                extract_btn = gr.Button("Extract Segments", variant="primary", size="lg")
+                with gr.Row(elem_id="action-btns-row"):
+                    resegment_toggle_btn = gr.Button(
+                        "Resegment with New Settings", variant="primary", size="lg", visible=False
+                    )
+                    retranscribe_btn = gr.Button(
+                        "Retranscribe with Large Model", variant="primary", size="lg", visible=False
+                    )
+                with gr.Row(elem_id="ts-row"):
+                    compute_ts_btn = gr.Button(
+                        "Compute Timestamps", variant="secondary", size="lg", interactive=False, visible=False
+                    )
+                    compute_ts_progress = gr.HTML(value="", visible=False)
+                    animate_all_html = gr.HTML(value="", visible=False)
+
+                with gr.Column(visible=False) as resegment_panel:
+                    gr.Markdown(
+                        "Uses cached data, skipping the heavy computation, "
+                        "so it's much faster. Useful if results are over-segmented "
+                        "or under-segmented"
+                    )
+                    rs_silence, rs_speech, rs_pad, \
+                        rs_btn_muj, rs_btn_mur, rs_btn_fast = _create_segmentation_settings(id_suffix="-rs")
+                    resegment_btn = gr.Button("Resegment", variant="primary", size="lg")
+
+                _wire_presets(rs_btn_muj, rs_btn_mur, rs_btn_fast,
+                              rs_silence, rs_speech, rs_pad)
+
+                output_html = gr.HTML(
+                    value='<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>',
+                    elem_classes=["output-html"]
+                )
+                # Hidden JSON output for API consumers
+                output_json = gr.JSON(visible=False, label="JSON Output")
+
+        # State components for caching VAD data between runs
+        cached_speech_intervals = gr.State(value=None)
+        cached_is_complete = gr.State(value=None)
+        cached_audio = gr.State(value=None)
+        cached_sample_rate = gr.State(value=None)
+        cached_intervals = gr.State(value=None)      # cleaned (start,end) list from last run
+        cached_model_name = gr.State(value=None)     # model used in last run (for retranscribe label)
+        cached_segment_dir = gr.State(value=None)    # segment audio dir from last run (for MFA timestamps)
+        cached_log_row = gr.State(value=None)         # usage log row dict (mutated in-place before push)
+
+        # Event handlers
+        # D. Clear everything when new audio is uploaded/recorded
+        _empty_placeholder = '<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>'
+        audio_input.change(
+            fn=lambda: (
+                _empty_placeholder, None, None,
+                None, None, None, None, None, None, None, None,
+                gr.update(visible=True),   # show extract_btn
+                gr.update(visible=False, interactive=False, variant="secondary"),  # hide+reset compute_ts_btn
+                gr.update(visible=False),  # hide compute_ts_progress
+                gr.update(visible=False),  # hide animate_all_html
+                gr.update(visible=False),  # hide resegment_toggle_btn
+                gr.update(visible=False),  # hide retranscribe_btn
+                gr.update(visible=False),  # hide resegment_panel
+            ),
+            inputs=[],
+            outputs=[
+                output_html, output_json, export_file,
+                cached_speech_intervals, cached_is_complete, cached_audio, cached_sample_rate,
+                cached_intervals, cached_model_name, cached_segment_dir, cached_log_row,
+                extract_btn, compute_ts_btn, compute_ts_progress, animate_all_html,
+                resegment_toggle_btn, retranscribe_btn, resegment_panel,
+            ],
+            api_name=False, show_progress="hidden"
+        )
+
+        # Example recitation buttons
+        btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU"), inputs=[], outputs=[audio_input, device_radio], api_name=False)
+        btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU"), inputs=[], outputs=[audio_input, device_radio], api_name=False)
+        btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU"), inputs=[], outputs=[audio_input, device_radio], api_name=False)
+        btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU"), inputs=[], outputs=[audio_input, device_radio], api_name=False)
+
+        # A. Extract button click chain
+        extract_btn.click(
+            fn=process_audio,
+            inputs=[
+                audio_input,
+                min_silence_slider,
+                min_speech_slider,
+                pad_slider,
+                model_radio,
+                device_radio
+            ],
+            outputs=[
+                output_html, output_json,
+                cached_speech_intervals, cached_is_complete,
+                cached_audio, cached_sample_rate,
+                cached_intervals, cached_segment_dir,
+                cached_log_row,
+            ],
+            api_name=False, show_progress="minimal"
+        ).then(
+            fn=save_json_export,
+            inputs=[output_json],
+            outputs=[export_file],
+            show_progress="hidden"
+        ).then(
+            fn=lambda silence, speech, pad, model: (
+                gr.update(visible=False),  # hide extract_btn
+                gr.update(visible=True, interactive=True, variant="primary"),  # show+enable compute_ts_btn
+                gr.update(visible=True),   # show resegment_toggle_btn
+                gr.update(                 # show retranscribe_btn with opposite model label
+                    visible=True,
+                    value=f"Retranscribe with {'Large' if model == 'Base' else 'Base'} Model"
+                ),
+                silence, speech, pad,      # sync slider values to resegment panel
+                model,                     # store in cached_model_name
+            ),
+            inputs=[min_silence_slider, min_speech_slider, pad_slider, model_radio],
+            outputs=[extract_btn, compute_ts_btn, resegment_toggle_btn, retranscribe_btn,
+                     rs_silence, rs_speech, rs_pad, cached_model_name],
+            api_name=False, show_progress="hidden"
+        )
+
+        # A2. Compute Timestamps — MFA forced alignment adds data-start/data-end to word spans
+        compute_ts_btn.click(
+            fn=compute_mfa_timestamps,
+            inputs=[output_html, output_json, cached_segment_dir, cached_log_row],
+            outputs=[output_html, compute_ts_btn, animate_all_html, compute_ts_progress, output_json],
+            api_name=False, show_progress="hidden"
+        ).then(
+            fn=save_json_export,
+            inputs=[output_json],
+            outputs=[export_file],
+            show_progress="hidden"
+        )
+
+        # B. Toggle resegment panel visibility
+        _resegment_panel_visible = gr.State(value=False)
+
+        def _toggle_resegment_panel(currently_visible):
+            new_visible = not currently_visible
+            return gr.update(visible=new_visible), new_visible
+
+        resegment_toggle_btn.click(
+            fn=_toggle_resegment_panel,
+            inputs=[_resegment_panel_visible],
+            outputs=[resegment_panel, _resegment_panel_visible],
+            api_name=False, show_progress="hidden"
+        )
+
+        # C. Resegment button click chain
+        resegment_btn.click(
+            fn=resegment_audio,
+            inputs=[
+                cached_speech_intervals, cached_is_complete,
+                cached_audio, cached_sample_rate,
+                rs_silence, rs_speech, rs_pad,
+                model_radio, device_radio,
+                cached_log_row,
+            ],
+            outputs=[
+                output_html, output_json,
+                cached_speech_intervals, cached_is_complete,
+                cached_audio, cached_sample_rate,
+                cached_intervals, cached_segment_dir,
+                cached_log_row,
+            ],
+            api_name=False, show_progress="minimal"
+        ).then(
+            fn=lambda: (gr.update(visible=False), False),
+            inputs=[],
+            outputs=[resegment_panel, _resegment_panel_visible],
+            api_name=False, show_progress="hidden"
+        ).then(
+            fn=save_json_export,
+            inputs=[output_json],
+            outputs=[export_file],
+            show_progress="hidden"
+        ).then(
+            fn=lambda silence, speech, pad, model: (
+                silence, speech, pad,       # sync sliders back to main panel
+                model,                      # update cached_model_name to model_radio
+                gr.update(visible=True, interactive=True, variant="primary"),  # show+re-enable compute_ts_btn
+                gr.update(visible=False),   # hide animate_all_html (new segments, no timestamps)
+                gr.update(                  # re-show retranscribe with opposite label
+                    visible=True,
+                    value=f"Retranscribe with {'Large' if model == 'Base' else 'Base'} Model"
+                ),
+            ),
+            inputs=[rs_silence, rs_speech, rs_pad, model_radio],
+            outputs=[min_silence_slider, min_speech_slider, pad_slider,
+                     cached_model_name, compute_ts_btn, animate_all_html, retranscribe_btn],
+            api_name=False, show_progress="hidden"
+        )
+
+        # D. Retranscribe button click chain
+        retranscribe_btn.click(
+            fn=_retranscribe_wrapper,
+            inputs=[
+                cached_intervals, cached_audio, cached_sample_rate,
+                cached_speech_intervals, cached_is_complete,
+                cached_model_name, device_radio,
+                cached_log_row,
+                min_silence_slider, min_speech_slider, pad_slider,
+            ],
+            outputs=[
+                output_html, output_json,
+                cached_speech_intervals, cached_is_complete,
+                cached_audio, cached_sample_rate,
+                cached_intervals, cached_segment_dir,
+                cached_log_row,
+            ],
+            api_name=False, show_progress="minimal"
+        ).then(
+            fn=save_json_export,
+            inputs=[output_json],
+            outputs=[export_file],
+            show_progress="hidden"
+        ).then(
+            fn=lambda model_name: (
+                gr.update(visible=False),  # hide retranscribe_btn
+                gr.update(visible=True, interactive=True, variant="primary"),  # show+re-enable compute_ts_btn
+                gr.update(visible=False),   # hide animate_all_html (new segments, no timestamps)
+                "Large" if model_name == "Base" else "Base",  # update cached_model_name to opposite
+            ),
+            inputs=[cached_model_name],
+            outputs=[retranscribe_btn, compute_ts_btn, animate_all_html, cached_model_name],
+            api_name=False, show_progress="hidden"
+        )
+
+        # E. Animation granularity change handler — update JS global (client-side only)
+        anim_granularity_radio.change(
+            fn=None,
+            inputs=[anim_granularity_radio],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="""(g) => {
+                window.ANIM_GRANULARITY = g;
+                document.querySelectorAll('.segment-card').forEach(card => {
+                    if (card.querySelector('.animate-btn.active')) {
+                        if (g === 'Characters') {
+                            card.classList.add('anim-chars');
+                        } else {
+                            card.classList.remove('anim-chars');
+                        }
+                    }
+                });
+                // Also update mega card if Animate All is active
+                var mega = document.querySelector('.mega-card');
+                if (mega) {
+                    if (g === 'Characters') {
+                        mega.classList.add('anim-chars');
+                    } else {
+                        mega.classList.remove('anim-chars');
+                    }
+                }
+                // Update slider labels based on granularity
+                var unit = g === 'Characters' ? 'Characters' : 'Words';
+                var prevEl = document.getElementById('anim-window-prev');
+                if (prevEl) {
+                    var lbl = prevEl.querySelector('label span, label');
+                    if (lbl) lbl.textContent = 'Previous ' + unit;
+                }
+                var afterEl = document.getElementById('anim-window-after');
+                if (afterEl) {
+                    var lbl = afterEl.querySelector('label span, label');
+                    if (lbl) lbl.textContent = 'After ' + unit;
+                }
+                saveAnimSettings();
+            }"""
+        )
+
+        # F. Animation display mode change handler — apply preset values + toggle slider interactivity
+        def _on_mode_change(mode, verse_on, op_prev, op_after, w_prev, w_after):
+            preset = ANIM_PRESETS.get(mode)
+            is_custom = not preset
+            return (
+                gr.update(value=op_prev, interactive=is_custom),
+                gr.update(value=op_after, interactive=is_custom),
+                gr.update(value=w_prev, interactive=is_custom and not verse_on),
+                gr.update(value=w_after, interactive=is_custom and not verse_on),
+            )
+
+        anim_mode_radio.change(
+            fn=_on_mode_change,
+            inputs=[anim_mode_radio, anim_verse_checkbox,
+                    anim_opacity_prev_slider, anim_opacity_after_slider,
+                    anim_window_prev_slider, anim_window_after_slider],
+            outputs=[anim_opacity_prev_slider, anim_opacity_after_slider, anim_window_prev_slider, anim_window_after_slider],
+            api_name=False, show_progress="hidden",
+            js="""(mode, verseOn, opPrev, opAfter, wPrev, wAfter) => {
+                // Save current Custom values before switching away
+                var prevMode = window.ANIM_DISPLAY_MODE;
+                if (prevMode === 'Custom') {
+                    saveAnimSettings();
+                }
+                window.ANIM_DISPLAY_MODE = mode;
+                var preset = window.ANIM_PRESETS[mode];
+                if (preset) {
+                    window.ANIM_OPACITY_PREV = preset.prev_opacity;
+                    window.ANIM_OPACITY_AFTER = preset.after_opacity;
+                    window.ANIM_WINDOW_PREV = preset.prev_words;
+                    window.ANIM_WINDOW_AFTER = preset.after_words;
+                    opPrev = preset.prev_opacity;
+                    opAfter = preset.after_opacity;
+                    wPrev = preset.prev_words;
+                    wAfter = preset.after_words;
+                } else {
+                    // Entering Custom: restore saved Custom values from localStorage
+                    var s = loadAnimSettings();
+                    if (s && s.custom) {
+                        window.ANIM_OPACITY_PREV = s.custom.prevOpacity;
+                        window.ANIM_OPACITY_AFTER = s.custom.afterOpacity;
+                        window.ANIM_WINDOW_PREV = s.custom.prevWords;
+                        window.ANIM_WINDOW_AFTER = s.custom.afterWords;
+                        opPrev = s.custom.prevOpacity;
+                        opAfter = s.custom.afterOpacity;
+                        wPrev = s.custom.prevWords;
+                        wAfter = s.custom.afterWords;
+                    }
+                }
+                rebuildWindowGradient();
+                reapplyWindowNow();
+                updateWindowMaxLabel('anim-window-prev', window.ANIM_WINDOW_PREV, window.ANIM_WINDOW_PREV_MAX);
+                updateWindowMaxLabel('anim-window-after', window.ANIM_WINDOW_AFTER, window.ANIM_WINDOW_AFTER_MAX);
+                saveAnimSettings();
+                return [mode, verseOn, opPrev, opAfter, wPrev, wAfter];
+            }"""
+        )
+
+        # G. Before/After opacity slider change handlers
+        anim_opacity_prev_slider.change(
+            fn=None,
+            inputs=[anim_opacity_prev_slider],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { window.ANIM_OPACITY_PREV = val; rebuildWindowGradient(); reapplyWindowNow(); window._windowSettingsVersion++; saveAnimSettings(); }"
+        )
+        anim_opacity_after_slider.change(
+            fn=None,
+            inputs=[anim_opacity_after_slider],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { window.ANIM_OPACITY_AFTER = val; rebuildWindowGradient(); reapplyWindowNow(); window._windowSettingsVersion++; saveAnimSettings(); }"
+        )
+
+        # G2. Prev/After word count slider change handlers
+        anim_window_prev_slider.change(
+            fn=None,
+            inputs=[anim_window_prev_slider],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="""(val) => {
+                window.ANIM_WINDOW_PREV = val;
+                rebuildWindowGradient(); reapplyWindowNow();
+                updateWindowMaxLabel('anim-window-prev', val, window.ANIM_WINDOW_PREV_MAX);
+                window._windowSettingsVersion++;
+                saveAnimSettings();
+            }"""
+        )
+        anim_window_after_slider.change(
+            fn=None,
+            inputs=[anim_window_after_slider],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="""(val) => {
+                window.ANIM_WINDOW_AFTER = val;
+                rebuildWindowGradient(); reapplyWindowNow();
+                updateWindowMaxLabel('anim-window-after', val, window.ANIM_WINDOW_AFTER_MAX);
+                window._windowSettingsVersion++;
+                saveAnimSettings();
+            }"""
+        )
+
+        # G3. Verse checkbox change handler
+        def _on_verse_toggle(verse_on, mode):
+            if mode != "Custom":
+                return gr.update(), gr.update()
+            return (
+                gr.update(interactive=not verse_on),
+                gr.update(interactive=not verse_on),
+            )
+
+        anim_verse_checkbox.change(
+            fn=_on_verse_toggle,
+            inputs=[anim_verse_checkbox, anim_mode_radio],
+            outputs=[anim_window_prev_slider, anim_window_after_slider],
+            api_name=False, show_progress="hidden",
+            js="""(val, mode) => {
+                window.ANIM_VERSE_MODE = val;
+                reapplyWindowNow();
+                window._windowSettingsVersion++;
+                saveAnimSettings();
+                return [val, mode];
+            }"""
+        )
+
+        # H. Word spacing slider change handler
+        anim_word_spacing_slider.change(
+            fn=None, inputs=[anim_word_spacing_slider], outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { var m=document.querySelector('.mega-card'); if(m) m.style.wordSpacing=val+'em'; saveAnimSettings(); }"
+        )
+
+        # J. Text size slider change handler
+        anim_text_size_slider.change(
+            fn=None, inputs=[anim_text_size_slider], outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { var m=document.querySelector('.mega-card'); if(m) m.style.fontSize=val+'px'; saveAnimSettings(); }"
+        )
+
+        # K. Line spacing slider change handler
+        anim_line_spacing_slider.change(
+            fn=None, inputs=[anim_line_spacing_slider], outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { var m=document.querySelector('.mega-card'); if(m) m.style.lineHeight=val; saveAnimSettings(); }"
+        )
+
+        # L. Active color picker change handler — update CSS variable (client-side only)
+        anim_color_picker.change(
+            fn=None,
+            inputs=[anim_color_picker],
+            outputs=[],
+            api_name=False, show_progress="hidden",
+            js="(val) => { document.documentElement.style.setProperty('--anim-word-color', val); saveAnimSettings(); }"
+        )
+
+
+
+
+        # M. Restore animation settings from localStorage on page load
+        def _restore_anim_settings(cached):
+            """Restore animation settings from localStorage via hidden JSON bridge."""
+            if not cached:
+                return (gr.update(),) * 11  # No saved settings — keep defaults
+            mode = cached.get("mode", ANIM_DISPLAY_MODE_DEFAULT)
+            preset = ANIM_PRESETS.get(mode)
+            is_custom = not preset
+            verse_on = bool(cached.get("verseOnly", False))
+            if preset:
+                op_prev = preset["prev_opacity"]
+                op_after = preset["after_opacity"]
+                w_prev = preset["prev_words"]
+                w_after = preset["after_words"]
+            elif cached.get("custom"):
+                c = cached["custom"]
+                op_prev = c.get("prevOpacity", ANIM_OPACITY_PREV_DEFAULT)
+                op_after = c.get("afterOpacity", ANIM_OPACITY_AFTER_DEFAULT)
+                w_prev = c.get("prevWords", ANIM_WINDOW_PREV_DEFAULT)
+                w_after = c.get("afterWords", ANIM_WINDOW_AFTER_DEFAULT)
+            else:
+                op_prev = ANIM_OPACITY_PREV_DEFAULT
+                op_after = ANIM_OPACITY_AFTER_DEFAULT
+                w_prev = ANIM_WINDOW_PREV_DEFAULT
+                w_after = ANIM_WINDOW_AFTER_DEFAULT
+            return (
+                gr.update(value=cached.get("granularity", ANIM_GRANULARITY_DEFAULT)),
+                gr.update(value=mode),
+                gr.update(value=verse_on),
+                gr.update(value=cached.get("color", ANIM_WORD_COLOR)),
+                gr.update(value=op_prev, interactive=is_custom),
+                gr.update(value=op_after, interactive=is_custom),
+                gr.update(value=w_prev, interactive=is_custom and not verse_on),
+                gr.update(value=w_after, interactive=is_custom and not verse_on),
+                gr.update(value=cached.get("wordSpacing", MEGA_WORD_SPACING_DEFAULT)),
+                gr.update(value=cached.get("textSize", MEGA_TEXT_SIZE_DEFAULT)),
+                gr.update(value=cached.get("lineSpacing", MEGA_LINE_SPACING_DEFAULT)),
+            )
+
+        app.load(
+            fn=_restore_anim_settings,
+            inputs=[anim_cached_settings],
+            outputs=[
+                anim_granularity_radio, anim_mode_radio, anim_verse_checkbox,
+                anim_color_picker,
+                anim_opacity_prev_slider, anim_opacity_after_slider,
+                anim_window_prev_slider, anim_window_after_slider,
+                anim_word_spacing_slider, anim_text_size_slider, anim_line_spacing_slider,
+            ],
+            show_progress="hidden",
+            js="""(ignored) => {
+                var s = loadAnimSettings();
+                if (s && s.color) document.documentElement.style.setProperty('--anim-word-color', s.color);
+                // Update window max labels and slider labels after Gradio renders
+                if (s) setTimeout(function() {
+                    updateWindowMaxLabel('anim-window-prev', window.ANIM_WINDOW_PREV, window.ANIM_WINDOW_PREV_MAX);
+                    updateWindowMaxLabel('anim-window-after', window.ANIM_WINDOW_AFTER, window.ANIM_WINDOW_AFTER_MAX);
+                    if (s.granularity === 'Characters') {
+                        var prevEl = document.getElementById('anim-window-prev');
+                        if (prevEl) { var lbl = prevEl.querySelector('label span, label'); if (lbl) lbl.textContent = 'Previous Characters'; }
+                        var afterEl = document.getElementById('anim-window-after');
+                        if (afterEl) { var lbl = afterEl.querySelector('label span, label'); if (lbl) lbl.textContent = 'After Characters'; }
+                    }
+                }, 200);
+                return s;
+            }"""
+        )
+
+        # Hidden API-only endpoint for JSON output
+        gr.Button(visible=False).click(
+            fn=process_audio_json,
+            inputs=[audio_input, min_silence_slider, min_speech_slider, pad_slider, model_radio, device_radio],
+            outputs=[output_json],
+            api_name="process_audio_json"
+        )
+
+    
+    return app
+
+
+# =============================================================================
+# Module-level demo for Gradio hot-reload (`gradio app.py`)
+# =============================================================================
+demo = build_interface()
+
+# =============================================================================
+# Main
+# =============================================================================
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--share", action="store_true", help="Create public link")
+    parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
+    args = parser.parse_args()
+
+    port = 7860
+
+    print(f"ZeroGPU available: {ZERO_GPU_AVAILABLE}")
+    print(f"Launching Gradio on port {port}")
+
+    # Preload models and caches at startup so first request is fast
+    from src.segment_processor import load_segmenter
+    from src.alignment.phoneme_asr import load_phoneme_asr
+    from src.alignment.ngram_index import get_ngram_index
+    from src.alignment.phoneme_matcher_cache import preload_all_chapters
+    print("Preloading models...")
+    load_segmenter()
+    load_phoneme_asr("Base")
+    load_phoneme_asr("Large")
+    print("Models preloaded.")
+    print("Preloading caches...")
+    get_ngram_index()
+    preload_all_chapters()
+    print("Caches preloaded.")
+
+    # Warm up soxr resampler so first request doesn't pay initialization cost
+    import librosa
+    _dummy = librosa.resample(np.zeros(1600, dtype=np.float32), orig_sr=44100, target_sr=16000, res_type=RESAMPLE_TYPE)
+    del _dummy
+    print("Resampler warmed up.")
+
+    # AoT compilation for VAD model (requires GPU lease)
+    if IS_HF_SPACE and ZERO_GPU_AVAILABLE:
+        print("Running AoT compilation for VAD model...")
+        try:
+            aoti_result = test_aoti_compilation_gpu()
+            print(f"AoT compile result: {aoti_result}")
+            # Apply compiled model OUTSIDE GPU lease (critical for persistence)
+            if aoti_result.get("compiled"):
+                apply_aoti_compiled(aoti_result["compiled"])
+        except Exception as e:
+            print(f"AoT compilation failed (non-fatal): {e}")
+
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=port,
+        share=args.share,
+        allowed_paths=["/tmp"],
+    )