Quran-multi-aligner

Running on Zero

hetchyy Claude Opus 4.6 commited on Feb 21

Commit

7f362a6

1 Parent(s): aadab30

Add transition segment detection for non-Quranic phrases (Amin, Takbir, Tahmeed)

Detect and render Amin (after Al-Fatiha), Takbir (Allahu Akbar), and
Tahmeed (Sami'a Allahu liman hamidah) as special segment cards with
distinct styling. Includes Takbir-at-start detection before Isti'adha/
Basmala, transition mode state machine for consecutive non-Quranic
segments, Tahmeed peek-ahead merge for split sami'a+rabbana segments,
and unified special segment rendering with name badges.

Also simplify GPU error handling: replace per-error-type fallback with
immediate process restart for non-quota GPU errors.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (8) hide show

config.py +1 -11
src/alignment/alignment_pipeline.py +119 -7
src/alignment/special_segments.py +170 -33
src/core/segment_types.py +2 -0
src/core/zero_gpu.py +19 -256
src/pipeline.py +21 -4
src/ui/segments.py +17 -6
src/ui/styles.py +4 -0

config.py CHANGED Viewed

@@ -133,6 +133,7 @@ LOOKBACK_WORDS = 30                 # Window words to look back from pointer for
 LOOKAHEAD_WORDS = 10                # Window words to look ahead after expected end position
 MAX_EDIT_DISTANCE = 0.25            # Max normalized edit distance for valid ayah match
 MAX_SPECIAL_EDIT_DISTANCE = 0.35    # Max normalized edit distance for Basmala/Isti'adha detection
 START_PRIOR_WEIGHT = 0.005          # Penalty per word away from expected position
 # Failed Segments
@@ -234,17 +235,6 @@ PROGRESS_RETRANSCRIBE = {
 MFA_PROGRESS_SEGMENT_RATE = 0.05  # seconds per segment for progress bar animation
-# =============================================================================
-# CUDA health monitoring (ZeroGPU poisoning protection)
-# =============================================================================
-CUDA_COOLDOWN_SECONDS = 30         # Seconds before retrying GPU after CUDA error
-MAX_CUDA_FAILURES = 5              # Consecutive CUDA failures before process restart
-# SDK worker scheduling errors (transient — different from CUDA hardware errors)
-SDK_WORKER_COOLDOWN_SECONDS = 15   # Very short — scheduler issues resolve fast
-SDK_WORKER_FAILURE_THRESHOLD = 2   # After 2 consecutive failed retries → cooldown
 # =============================================================================
 # UI settings
 # =============================================================================

 LOOKAHEAD_WORDS = 10                # Window words to look ahead after expected end position
 MAX_EDIT_DISTANCE = 0.25            # Max normalized edit distance for valid ayah match
 MAX_SPECIAL_EDIT_DISTANCE = 0.35    # Max normalized edit distance for Basmala/Isti'adha detection
+MAX_TRANSITION_EDIT_DISTANCE = 0.35 # Max normalized edit distance for transition segments (Amin/Takbir/Tahmeed)
 START_PRIOR_WEIGHT = 0.005          # Penalty per word away from expected position
 # Failed Segments
 MFA_PROGRESS_SEGMENT_RATE = 0.05  # seconds per segment for progress bar animation
 # =============================================================================
 # UI settings
 # =============================================================================

src/alignment/alignment_pipeline.py CHANGED Viewed

@@ -18,7 +18,7 @@ def run_phoneme_matching(
     first_quran_idx: int = 0,
     special_results: List[tuple] = None,
     start_pointer: int = 0,
-) -> Tuple[List[tuple], dict, set]:
     """
     Phoneme-based segment matching using substring DP.
@@ -30,8 +30,9 @@ def run_phoneme_matching(
         start_pointer: Initial word pointer from anchor voting
     Returns:
-        (results, profiling_dict, gap_segments)
         results: List[(matched_text, score, matched_ref), ...]
     """
     from .phoneme_matcher import align_segment, get_matched_text
     from .phoneme_matcher_cache import get_chapter_reference
@@ -62,7 +63,10 @@ def run_phoneme_matching(
         result_build_total = 0.0
     # Track whether the next segment might have Basmala fused with verse content
-    from .special_segments import SPECIAL_PHONEMES, SPECIAL_TEXT
     basmala_already_detected = any(
         r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
     )
@@ -70,6 +74,12 @@ def run_phoneme_matching(
     special_merges = 0
     # Gap tracking (initialized here so inline chapter-transition checks can add entries)
     gap_segments = set()
     transition_expected_pointer = -1  # -1 = no pending check
@@ -111,9 +121,60 @@ def run_phoneme_matching(
             skip_count -= 1
             continue
         segment_idx = first_quran_idx + i + 1  # 1-indexed for display
         segments_attempted += 1
         alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
         num_segments += 1
@@ -125,8 +186,22 @@ def run_phoneme_matching(
         # Chapter transition: pointer past end of chapter
         if alignment is None and pointer >= chapter_ref.num_words:
-            from .special_segments import detect_inter_chapter_specials
             remaining_phonemes = phoneme_texts[first_quran_idx + i:]
             inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
             if chapter_ref.surah == 1:
@@ -134,8 +209,8 @@ def run_phoneme_matching(
                 print(f"  [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
                       f"running global reanchor...")
-                # Use segments after specials for anchor voting
-                anchor_offset = first_quran_idx + i + num_consumed
                 anchor_remaining = phoneme_texts[anchor_offset:]
                 reanchor_surah, reanchor_ayah = find_anchor_by_voting(
@@ -171,6 +246,18 @@ def run_phoneme_matching(
                 detected_surah = next_surah
                 consecutive_failures = 0
                 if num_consumed > 0:
                     has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
                     is_first_after_transition = not has_basmala
@@ -237,6 +324,29 @@ def run_phoneme_matching(
             _check_transition_gap(alignment.start_word_idx)
             segments_passed += 1
         else:
             # === Graduated retry ===
             # Tier 1: expanded window, same threshold
             tier1_attempts += 1
@@ -381,6 +491,7 @@ def run_phoneme_matching(
             "segments_attempted": segments_attempted,
             "segments_passed": segments_passed,
             "special_merges": special_merges,
         }
     else:
         profiling = {
@@ -395,6 +506,7 @@ def run_phoneme_matching(
             "segments_attempted": segments_attempted,
             "segments_passed": segments_passed,
             "special_merges": special_merges,
         }
-    return results, profiling, gap_segments

     first_quran_idx: int = 0,
     special_results: List[tuple] = None,
     start_pointer: int = 0,
+) -> Tuple[List[tuple], dict, set, dict]:
     """
     Phoneme-based segment matching using substring DP.
         start_pointer: Initial word pointer from anchor voting
     Returns:
+        (results, profiling_dict, gap_segments, merged_into)
         results: List[(matched_text, score, matched_ref), ...]
+        merged_into: dict mapping consumed segment indices to their target segment index
     """
     from .phoneme_matcher import align_segment, get_matched_text
     from .phoneme_matcher_cache import get_chapter_reference
         result_build_total = 0.0
     # Track whether the next segment might have Basmala fused with verse content
+    from .special_segments import (
+        SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT,
+        detect_transition_segment, detect_inter_chapter_specials,
+    )
     basmala_already_detected = any(
         r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
     )
     special_merges = 0
+    # Transition segment state
+    transition_mode = False
+    transition_skips = 0
+    tahmeed_merge_skip = 0
+    merged_into = {}  # {consumed_idx: target_idx}
     # Gap tracking (initialized here so inline chapter-transition checks can add entries)
     gap_segments = set()
     transition_expected_pointer = -1  # -1 = no pending check
             skip_count -= 1
             continue
+        # Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments)
+        if tahmeed_merge_skip > 0:
+            # This segment's audio was merged into the previous Tahmeed segment
+            results.append(("", 0.0, ""))
+            word_indices.append(None)
+            tahmeed_merge_skip -= 1
+            transition_skips += 1
+            continue
         segment_idx = first_quran_idx + i + 1  # 1-indexed for display
         segments_attempted += 1
+        # Transition mode: keep checking for transitions before trying alignment
+        if transition_mode:
+            trans_name, trans_conf = detect_transition_segment(asr_phonemes)
+            if trans_name:
+                print(f"  [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
+                results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name))
+                word_indices.append(None)
+                transition_skips += 1
+                # Tahmeed peek-ahead for merge
+                if trans_name == "Tahmeed":
+                    next_abs = first_quran_idx + i + 1
+                    if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
+                        resp_name, resp_conf = detect_transition_segment(
+                            phoneme_texts[next_abs], allowed={"Tahmeed"})
+                        if resp_name:
+                            merged_into[next_abs] = first_quran_idx + i
+                            tahmeed_merge_skip = 1
+                            print(f"  [TAHMEED-MERGE] Next segment merged into Tahmeed")
+                continue
+            else:
+                # Exit transition mode, global reanchor
+                transition_mode = False
+                print(f"  [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...")
+                remaining_idx = first_quran_idx + i
+                remaining_texts = phoneme_texts[remaining_idx:]
+                if remaining_texts:
+                    reanchor_surah, reanchor_ayah = find_anchor_by_voting(
+                        remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS,
+                    )
+                    if reanchor_surah > 0:
+                        if reanchor_surah != detected_surah:
+                            detected_surah = reanchor_surah
+                            chapter_ref = get_chapter_reference(detected_surah)
+                        pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
+                        transition_expected_pointer = pointer
+                        print(f"  [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, "
+                              f"Ayah {reanchor_ayah}, word {pointer}")
+                    consecutive_failures = 0
+                # Fall through to normal alignment below
         alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
         num_segments += 1
         # Chapter transition: pointer past end of chapter
         if alignment is None and pointer >= chapter_ref.num_words:
             remaining_phonemes = phoneme_texts[first_quran_idx + i:]
+            amin_consumed = 0
+            if chapter_ref.surah == 1:
+                # Check for Amin after Al-Fatiha before inter-chapter specials
+                amin_name, amin_conf = detect_transition_segment(
+                    asr_phonemes, allowed={"Amin"})
+                if amin_name:
+                    print(f"  [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})")
+                    results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin"))
+                    word_indices.append(None)
+                    transition_skips += 1
+                    amin_consumed = 1
+                    # Re-slice remaining phonemes to start after Amin
+                    remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:]
             inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
             if chapter_ref.surah == 1:
                 print(f"  [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
                       f"running global reanchor...")
+                # Use segments after Amin + specials for anchor voting
+                anchor_offset = first_quran_idx + i + amin_consumed + num_consumed
                 anchor_remaining = phoneme_texts[anchor_offset:]
                 reanchor_surah, reanchor_ayah = find_anchor_by_voting(
                 detected_surah = next_surah
                 consecutive_failures = 0
+                if amin_consumed > 0:
+                    # Current segment was Amin (already appended above).
+                    # Queue inter-chapter specials for subsequent segments.
+                    has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
+                    is_first_after_transition = not has_basmala
+                    if num_consumed > 0:
+                        pending_specials = list(inter_specials)
+                        skip_count = num_consumed
+                    else:
+                        is_first_after_transition = True
+                    continue
                 if num_consumed > 0:
                     has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
                     is_first_after_transition = not has_basmala
             _check_transition_gap(alignment.start_word_idx)
             segments_passed += 1
         else:
+            # === Check for transition segment before retry tiers ===
+            trans_name, trans_conf = detect_transition_segment(asr_phonemes)
+            if trans_name:
+                print(f"  [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
+                result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name)
+                word_indices.append(None)
+                transition_skips += 1
+                transition_mode = True
+                # Tahmeed peek-ahead for merge
+                if trans_name == "Tahmeed":
+                    next_abs = first_quran_idx + i + 1
+                    if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
+                        resp_name, resp_conf = detect_transition_segment(
+                            phoneme_texts[next_abs], allowed={"Tahmeed"})
+                        if resp_name:
+                            merged_into[next_abs] = first_quran_idx + i
+                            tahmeed_merge_skip = 1
+                            print(f"  [TAHMEED-MERGE] Next segment merged into Tahmeed")
+                results.append(result)
+                continue
             # === Graduated retry ===
             # Tier 1: expanded window, same threshold
             tier1_attempts += 1
             "segments_attempted": segments_attempted,
             "segments_passed": segments_passed,
             "special_merges": special_merges,
+            "transition_skips": transition_skips,
         }
     else:
         profiling = {
             "segments_attempted": segments_attempted,
             "segments_passed": segments_passed,
             "special_merges": special_merges,
+            "transition_skips": transition_skips,
         }
+    return results, profiling, gap_segments, merged_into

src/alignment/special_segments.py CHANGED Viewed

@@ -16,7 +16,7 @@ from typing import List, Tuple, Optional
 # Constants
 # =============================================================================
-from config import MAX_SPECIAL_EDIT_DISTANCE
 # Special phoneme sequences
 SPECIAL_PHONEMES = {
@@ -40,6 +40,66 @@ SPECIAL_TEXT = {
     "Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
 }
 # =============================================================================
 # Levenshtein Distance
@@ -141,8 +201,25 @@ def detect_special_segments(
     special_results: List[Tuple[str, float, str]] = []
-    # Segment 0 phonemes (already a list)
     seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
     # ==========================================================================
     # 1. Try COMBINED (Isti'adha + Basmala in one segment)
@@ -152,82 +229,88 @@ def detect_special_segments(
     if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
         print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
-        # Split segment 0 by midpoint
-        seg = vad_segments[0]
-        audio = segment_audios[0]
         mid_time = (seg.start_time + seg.end_time) / 2.0
         mid_sample = max(1, len(audio) // 2)
-        # Create two new segments
-        new_vads = [
-            VadSegment(start_time=seg.start_time, end_time=mid_time, segment_idx=0),
-            VadSegment(start_time=mid_time, end_time=seg.end_time, segment_idx=1),
-        ]
-        new_audios = [
-            audio[:mid_sample],
-            audio[mid_sample:],
-        ]
         # Add remaining segments with reindexed segment_idx
-        for i, vs in enumerate(vad_segments[1:], start=2):
             new_vads.append(VadSegment(
                 start_time=vs.start_time,
                 end_time=vs.end_time,
-                segment_idx=i
             ))
-        new_audios.extend(segment_audios[1:])
         # Special results for both (confidence = 1 - distance)
         confidence = 1.0 - combined_dist
-        special_results = [
             (SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
             (SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
-        ]
-        return new_vads, new_audios, special_results, 2
     # ==========================================================================
-    # 2. Try Isti'adha on segment 0
     # ==========================================================================
     istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
     if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
-        print(f"[SPECIAL] Isti'adha detected on segment 0 (dist={istiadha_dist:.2f})")
         special_results.append(
             (SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
         )
-        # Try Basmala on segment 1
-        if len(phoneme_texts) >= 2 and phoneme_texts[1]:
-            seg1_phonemes = phoneme_texts[1]
             basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
             if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
-                print(f"[SPECIAL] Basmala detected on segment 1 (dist={basmala_dist:.2f})")
                 special_results.append(
                     (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
                 )
-                return vad_segments, segment_audios, special_results, 2
             else:
-                print(f"[SPECIAL] No Basmala on segment 1 (dist={basmala_dist:.2f})")
-        return vad_segments, segment_audios, special_results, 1
     # ==========================================================================
-    # 3. Try Basmala on segment 0
     # ==========================================================================
     basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
     if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
-        print(f"[SPECIAL] Basmala detected on segment 0 (dist={basmala_dist:.2f})")
         special_results.append(
             (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
         )
-        return vad_segments, segment_audios, special_results, 1
     # ==========================================================================
-    # 4. No specials detected
     # ==========================================================================
     print(f"[SPECIAL] No special segments detected "
           f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
@@ -293,3 +376,57 @@ def detect_inter_chapter_specials(
     print(f"[INTER-CHAPTER] No special segments detected "
           f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
     return [], 0

 # Constants
 # =============================================================================
+from config import MAX_SPECIAL_EDIT_DISTANCE, MAX_TRANSITION_EDIT_DISTANCE
 # Special phoneme sequences
 SPECIAL_PHONEMES = {
     "Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
 }
+# Transition phoneme sequences (non-Quranic phrases within recitations)
+TRANSITION_PHONEMES = {
+    # آمِين — not in Quran, manually constructed (5 phonemes)
+    "Amin": ["ʔ", "a:", "m", "i:", "n"],
+    # اللَّهُ أَكْبَر — standalone, hamza wasl pronounced (12 phonemes)
+    # Heavy lam (lˤlˤ) because start of utterance → fatha context
+    "Takbir": [
+        "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
+        "ʔ", "a", "k", "b", "a", "rˤ",
+    ],
+    # اللَّهُ أَكْبَر اللَّهُ أَكْبَر — repeated twice in one segment (24 phonemes)
+    # Some reciters say Takbir twice. Internal comparison only — display text stays single.
+    "Takbir_double": [
+        "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
+        "ʔ", "a", "k", "b", "a", "rˤ",
+        "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
+        "ʔ", "a", "k", "b", "a", "rˤ",
+    ],
+    # سَمِعَ اللَّهُ لِمَنْ حَمِدَه — waqf on final word (22 phonemes)
+    # Heavy lam after fatha of سَمِعَ; izhar noon before ح (throat letter)
+    "Tahmeed": [
+        "s", "a", "m", "i", "ʕ", "a",
+        "lˤlˤ", "aˤ:", "h", "u",
+        "l", "i", "m", "a", "n",
+        "ħ", "a", "m", "i", "d", "a", "h",
+    ],
+    # سَمِعَ اللَّهُ لِمَنْ حَمِدَهُ رَبَّنَا وَلَكَ الْحَمْد — both in one segment (39 phonemes)
+    # Connected speech: هُ keeps damma (no waqf mid-phrase), waqf on الحمد
+    "Tahmeed_combined": [
+        "s", "a", "m", "i", "ʕ", "a",
+        "lˤlˤ", "aˤ:", "h", "u",
+        "l", "i", "m", "a", "n",
+        "ħ", "a", "m", "i", "d", "a", "h", "u",
+        "rˤ", "aˤ", "bb", "a", "n", "a:",
+        "w", "a", "l", "a", "k", "a",
+        "l", "ħ", "a", "m", "d",
+    ],
+    # رَبَّنَا وَلَكَ الْحَمْد — response only, for merge detection (17 phonemes)
+    # Heavy ra with emphatic fatha; ح is lunar so lam of ال is pronounced
+    "Tahmeed_response": [
+        "rˤ", "aˤ", "bb", "a", "n", "a:",
+        "w", "a", "l", "a", "k", "a",
+        "l", "ħ", "a", "m", "d",
+    ],
+}
+TRANSITION_TEXT = {
+    "Amin": "آمِين",
+    "Takbir": "اللَّهُ أَكْبَر",
+    "Tahmeed": "سَمِعَ اللَّهُ لِمَنْ حَمِدَه",
+}
+# All special segment reference names (for unified rendering)
+ALL_SPECIAL_REFS = {"Basmala", "Isti'adha", "Isti'adha+Basmala", "Amin", "Takbir", "Tahmeed"}
 # =============================================================================
 # Levenshtein Distance
     special_results: List[Tuple[str, float, str]] = []
+    # ==========================================================================
+    # 0. Check segment 0 for Takbir (recitation opener before Isti'adha/Basmala)
+    # ==========================================================================
+    takbir_offset = 0
     seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
+    takbir_name, takbir_conf = detect_transition_segment(seg0_phonemes, allowed={"Takbir"})
+    if takbir_name:
+        print(f"[SPECIAL] Takbir detected on segment 0 (conf={takbir_conf:.2f})")
+        special_results.append((TRANSITION_TEXT["Takbir"], takbir_conf, "Takbir"))
+        takbir_offset = 1
+        # Re-point to the next segment for Isti'adha/Basmala detection
+        if len(phoneme_texts) > 1:
+            seg0_phonemes = phoneme_texts[1] if phoneme_texts[1] else []
+        else:
+            return vad_segments, segment_audios, special_results, takbir_offset
+    # seg0_phonemes now points to the first non-Takbir segment
+    # (segment 0 if no Takbir, segment 1 if Takbir detected)
+    check_idx = takbir_offset  # Index into phoneme_texts for Isti'adha/Basmala detection
     # ==========================================================================
     # 1. Try COMBINED (Isti'adha + Basmala in one segment)
     if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
         print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
+        # Split the combined segment by midpoint
+        seg = vad_segments[check_idx]
+        audio = segment_audios[check_idx]
         mid_time = (seg.start_time + seg.end_time) / 2.0
         mid_sample = max(1, len(audio) // 2)
+        # Rebuild vad/audio lists: keep segments before check_idx, split, then rest
+        new_vads = list(vad_segments[:check_idx])
+        new_audios = list(segment_audios[:check_idx])
+        split_start_idx = len(new_vads)
+        new_vads.append(VadSegment(start_time=seg.start_time, end_time=mid_time, segment_idx=split_start_idx))
+        new_vads.append(VadSegment(start_time=mid_time, end_time=seg.end_time, segment_idx=split_start_idx + 1))
+        new_audios.append(audio[:mid_sample])
+        new_audios.append(audio[mid_sample:])
         # Add remaining segments with reindexed segment_idx
+        for ii, vs in enumerate(vad_segments[check_idx + 1:], start=split_start_idx + 2):
             new_vads.append(VadSegment(
                 start_time=vs.start_time,
                 end_time=vs.end_time,
+                segment_idx=ii
             ))
+        new_audios.extend(segment_audios[check_idx + 1:])
         # Special results for both (confidence = 1 - distance)
         confidence = 1.0 - combined_dist
+        special_results.extend([
             (SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
             (SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
+        ])
+        return new_vads, new_audios, special_results, takbir_offset + 2
     # ==========================================================================
+    # 2. Try Isti'adha on the check segment
     # ==========================================================================
     istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
     if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
+        print(f"[SPECIAL] Isti'adha detected on segment {check_idx} (dist={istiadha_dist:.2f})")
         special_results.append(
             (SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
         )
+        # Try Basmala on the next segment
+        next_idx = check_idx + 1
+        if next_idx < len(phoneme_texts) and phoneme_texts[next_idx]:
+            seg1_phonemes = phoneme_texts[next_idx]
             basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
             if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
+                print(f"[SPECIAL] Basmala detected on segment {next_idx} (dist={basmala_dist:.2f})")
                 special_results.append(
                     (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
                 )
+                return vad_segments, segment_audios, special_results, takbir_offset + 2
             else:
+                print(f"[SPECIAL] No Basmala on segment {next_idx} (dist={basmala_dist:.2f})")
+        return vad_segments, segment_audios, special_results, takbir_offset + 1
     # ==========================================================================
+    # 3. Try Basmala on the check segment
     # ==========================================================================
     basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
     if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
+        print(f"[SPECIAL] Basmala detected on segment {check_idx} (dist={basmala_dist:.2f})")
         special_results.append(
             (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
         )
+        return vad_segments, segment_audios, special_results, takbir_offset + 1
     # ==========================================================================
+    # 4. No specials detected (beyond Takbir if any)
     # ==========================================================================
+    if takbir_offset > 0:
+        print(f"[SPECIAL] Only Takbir detected, no Isti'adha/Basmala "
+              f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
+        return vad_segments, segment_audios, special_results, takbir_offset
     print(f"[SPECIAL] No special segments detected "
           f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
     print(f"[INTER-CHAPTER] No special segments detected "
           f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
     return [], 0
+# =============================================================================
+# Transition Segment Detection
+# =============================================================================
+# Mapping from variant names to their base/display name
+_TRANSITION_BASE_NAMES = {
+    "Takbir_double": "Takbir",
+    "Tahmeed_combined": "Tahmeed",
+    "Tahmeed_response": "Tahmeed",
+}
+def detect_transition_segment(
+    asr_phonemes: List[str],
+    allowed: Optional[set] = None,
+) -> Tuple[Optional[str], float]:
+    """Best-match transition (lowest edit dist under threshold).
+    Compares against all TRANSITION_PHONEMES entries. For entries with variant
+    suffixes (e.g. Takbir_double), the returned name is the base name (Takbir)
+    — variants only affect internal matching, not display. Best match = lowest
+    normalized edit distance.
+    Args:
+        asr_phonemes: ASR output phoneme sequence for one segment
+        allowed: Optional set of base names to restrict detection to
+                 (e.g. {"Amin"} to only check Amin)
+    Returns:
+        (name, confidence) where name is the base transition name or None,
+        and confidence = 1 - normalized_edit_distance.
+    """
+    if not asr_phonemes:
+        return None, 0.0
+    best_name = None
+    best_dist = float("inf")
+    for key, ref_phonemes in TRANSITION_PHONEMES.items():
+        base_name = _TRANSITION_BASE_NAMES.get(key, key)
+        if allowed is not None and base_name not in allowed:
+            continue
+        dist = phoneme_edit_distance(asr_phonemes, ref_phonemes)
+        if dist < best_dist:
+            best_dist = dist
+            best_name = base_name
+    if best_dist <= MAX_TRANSITION_EDIT_DISTANCE and best_name is not None:
+        return best_name, 1.0 - best_dist
+    return None, 0.0

src/core/segment_types.py CHANGED Viewed

@@ -67,6 +67,7 @@ class ProfilingData:
     segments_attempted: int = 0
     segments_passed: int = 0
     special_merges: int = 0
     # Result building profiling
     result_build_time: float = 0.0           # Total result building time
     result_audio_encode_time: float = 0.0    # Audio-to-data-URL encoding
@@ -140,6 +141,7 @@ class ProfilingData:
             f"    Tier 2 Retries:  {self.tier2_passed}/{self.tier2_attempts} passed   segments: {t2_segs}",
             f"    Reanchors (consec failures): {self.consec_reanchors}",
             f"    Special Merges:  {self.special_merges}",
             "-" * 60,
         ]
         profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time

     segments_attempted: int = 0
     segments_passed: int = 0
     special_merges: int = 0
+    transition_skips: int = 0
     # Result building profiling
     result_build_time: float = 0.0           # Total result building time
     result_audio_encode_time: float = 0.0    # Audio-to-data-URL encoding
             f"    Tier 2 Retries:  {self.tier2_passed}/{self.tier2_attempts} passed   segments: {t2_segs}",
             f"    Reanchors (consec failures): {self.consec_reanchors}",
             f"    Special Merges:  {self.special_merges}",
+            f"    Transition Skips: {self.transition_skips}",
             "-" * 60,
         ]
         profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time

src/core/zero_gpu.py CHANGED Viewed

@@ -5,7 +5,6 @@ local or non-ZeroGPU environments.
 import os
 import re
-import time as _time
 import threading
 from typing import Callable, TypeVar
 from functools import wraps
@@ -18,27 +17,6 @@ ZERO_GPU_AVAILABLE = False
 # Per-thread (per-request) GPU state so concurrent requests don't interfere
 _request_state = threading.local()
-# ---------------------------------------------------------------------------
-# Process-global CUDA health tracking.
-# Unlike _request_state (thread-local, per-request), this affects ALL threads.
-# When CUDA errors occur we mark it unhealthy so no request attempts GPU
-# until the cooldown expires — preventing further poisoning of torch's
-# internal CUDA state.
-# ---------------------------------------------------------------------------
-_cuda_health_lock = threading.Lock()
-_cuda_healthy = True
-_cuda_unhealthy_since = 0.0  # timestamp of first CUDA failure
-_consecutive_cuda_failures = 0  # track persistent failures across cooldowns
-# ---------------------------------------------------------------------------
-# SDK worker scheduling failure tracking (separate from CUDA hardware errors).
-# Worker errors are transient — the assigned GPU UUID isn't available when
-# worker_init runs. Retrying gets a different GPU via a new schedule() call.
-# ---------------------------------------------------------------------------
-_sdk_worker_lock = threading.Lock()
-_sdk_worker_consecutive_failures = 0
-_sdk_worker_cooldown_until = 0.0  # timestamp when cooldown expires
 # ---------------------------------------------------------------------------
 # Shared RLock for model device transitions AND inference.
 # RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
@@ -56,17 +34,6 @@ _active_gpu_leases = 0
 _models_stale = False  # Set True at lease end; drained at next lease start
-# CUDA error patterns that should trigger CPU fallback (not re-raise).
-# These indicate hardware/driver issues where retrying GPU won't help.
-_CUDA_ERROR_PATTERNS = (
-    "no cuda gpus are available",
-    "cuda must not be initialized",
-    "cuda error", "cuda out of memory",
-    "cuda driver", "cuda runtime",
-    "device-side assert", "cublas", "cudnn error", "nccl",
-    "gpu task aborted",  # ZeroGPU SDK wraps CUDA errors with this message
-)
 try:
     import spaces  # type: ignore
@@ -106,11 +73,6 @@ def _exit_gpu_lease():
         _active_gpu_leases = max(0, _active_gpu_leases - 1)
-def is_gpu_lease_active():
-    """Check if any thread currently holds a GPU lease."""
-    return _active_gpu_leases > 0
 # =========================================================================
 # Per-thread state helpers
 # =========================================================================
@@ -147,128 +109,6 @@ def force_cpu_mode():
     _request_state.user_forced_cpu = True
-# =========================================================================
-# Process-level CUDA health
-# =========================================================================
-def _is_cuda_healthy() -> bool:
-    """Check if CUDA is considered healthy at the process level.
-    If unhealthy, checks whether the cooldown has expired.
-    If expired, allows exactly one retry by marking healthy again.
-    """
-    global _cuda_healthy
-    if _cuda_healthy:
-        return True
-    from config import CUDA_COOLDOWN_SECONDS
-    with _cuda_health_lock:
-        if _cuda_healthy:
-            return True  # Another thread already recovered
-        elapsed = _time.time() - _cuda_unhealthy_since
-        if elapsed >= CUDA_COOLDOWN_SECONDS:
-            print(f"[CUDA HEALTH] Cooldown expired ({elapsed:.0f}s), allowing GPU retry")
-            _try_reset_cuda_state()
-            _cuda_healthy = True
-            return True
-        return False
-def _mark_cuda_unhealthy():
-    """Mark CUDA as unhealthy process-wide after a CUDA error.
-    All subsequent requests will skip GPU until cooldown expires.
-    After MAX_CUDA_FAILURES consecutive failures, restarts the process
-    (HF Spaces will restart the container automatically).
-    """
-    global _cuda_healthy, _cuda_unhealthy_since, _consecutive_cuda_failures
-    with _cuda_health_lock:
-        _consecutive_cuda_failures += 1
-        if _cuda_healthy:
-            _cuda_unhealthy_since = _time.time()
-            _cuda_healthy = False
-        print(f"[CUDA HEALTH] Marked CUDA unhealthy (consecutive failures: {_consecutive_cuda_failures})")
-        from config import MAX_CUDA_FAILURES
-        if _consecutive_cuda_failures >= MAX_CUDA_FAILURES:
-            print(f"[CUDA HEALTH] {_consecutive_cuda_failures} consecutive CUDA failures — "
-                  "process permanently poisoned, forcing restart")
-            os._exit(1)
-def _mark_cuda_success():
-    """Reset consecutive failure counter after a successful GPU operation."""
-    global _consecutive_cuda_failures
-    with _cuda_health_lock:
-        if _consecutive_cuda_failures > 0:
-            print(f"[CUDA HEALTH] GPU succeeded, resetting failure counter (was {_consecutive_cuda_failures})")
-            _consecutive_cuda_failures = 0
-def _try_reset_cuda_state():
-    """Reset torch.cuda internal state so a fresh GPU attempt can re-init cleanly.
-    After CUDA poisoning, torch.cuda._initialized remains True even though
-    the underlying CUDA context is dead. Resetting it allows the next
-    torch.cuda call to re-initialize from scratch.
-    """
-    try:
-        import torch.cuda as _cuda
-        if getattr(_cuda, '_initialized', False):
-            _cuda._initialized = False
-            print("[CUDA HEALTH] Reset torch.cuda._initialized")
-        if hasattr(_cuda, '_queued_calls'):
-            _cuda._queued_calls.clear()
-    except Exception as e:
-        print(f"[CUDA HEALTH] CUDA state reset failed (non-fatal): {e}")
-# =========================================================================
-# SDK worker scheduling health
-# =========================================================================
-def _record_sdk_worker_failure():
-    """Record a failed SDK worker retry. After threshold → enter cooldown."""
-    global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
-    from config import SDK_WORKER_COOLDOWN_SECONDS, SDK_WORKER_FAILURE_THRESHOLD
-    with _sdk_worker_lock:
-        _sdk_worker_consecutive_failures += 1
-        if _sdk_worker_consecutive_failures >= SDK_WORKER_FAILURE_THRESHOLD:
-            _sdk_worker_cooldown_until = _time.time() + SDK_WORKER_COOLDOWN_SECONDS
-            print(f"[GPU] SDK worker: {_sdk_worker_consecutive_failures} consecutive failures, "
-                  f"cooldown for {SDK_WORKER_COOLDOWN_SECONDS}s")
-def _reset_sdk_worker_failures():
-    """Reset SDK worker failure counter after a successful GPU operation."""
-    global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
-    with _sdk_worker_lock:
-        if _sdk_worker_consecutive_failures > 0:
-            print(f"[GPU] SDK worker: GPU succeeded, resetting failure counter "
-                  f"(was {_sdk_worker_consecutive_failures})")
-            _sdk_worker_consecutive_failures = 0
-            _sdk_worker_cooldown_until = 0.0
-def _is_sdk_worker_healthy() -> bool:
-    """Check if SDK worker scheduling is healthy (not in cooldown).
-    Returns True if no cooldown active or cooldown has expired.
-    Auto-resets on expiry so the next attempt can retry GPU.
-    """
-    global _sdk_worker_cooldown_until
-    with _sdk_worker_lock:
-        if _sdk_worker_cooldown_until <= 0.0:
-            return True
-        now = _time.time()
-        if now >= _sdk_worker_cooldown_until:
-            print("[GPU] SDK worker cooldown expired, allowing GPU retry")
-            _sdk_worker_cooldown_until = 0.0
-            return True
-        return False
 # =========================================================================
 # Model cleanup helpers
 # =========================================================================
@@ -325,6 +165,11 @@ def gpu_with_fallback(duration=60):
     The model_device_lock is held for the ENTIRE GPU lease (inference +
     cleanup) to prevent concurrent threads from moving models mid-inference.
     Usage:
         @gpu_with_fallback(duration=60)
         def my_gpu_func(data):
@@ -341,10 +186,7 @@ def gpu_with_fallback(duration=60):
             with model_device_lock:
                 try:
                     _drain_stale_models()
-                    result = func(*args, **kwargs)
-                    _mark_cuda_success()
-                    _reset_sdk_worker_failures()
-                    return result
                 finally:
                     try:
                         _cleanup_after_gpu()
@@ -373,105 +215,34 @@ def gpu_with_fallback(duration=60):
                 print("[GPU] Quota exhausted, using CPU fallback")
                 return func(*args, **kwargs)
-            # If CUDA is unhealthy process-wide, skip GPU to prevent
-            # further poisoning of torch's internal CUDA state
-            if not _is_cuda_healthy():
-                from config import CUDA_COOLDOWN_SECONDS
-                remaining = CUDA_COOLDOWN_SECONDS - (_time.time() - _cuda_unhealthy_since)
-                print(f"[CUDA HEALTH] CUDA unhealthy, skipping GPU (retry in {remaining:.0f}s)")
-                _request_state.gpu_quota_exhausted = True
-                try:
-                    import gradio as gr
-                    gr.Warning(f"GPU temporarily unavailable — using CPU. Retry in {max(1, int(remaining / 60))}m.")
-                except Exception:
-                    pass
-                return func(*args, **kwargs)
-            # If SDK worker scheduling is in cooldown, skip GPU
-            if not _is_sdk_worker_healthy():
-                print("[GPU] SDK worker in cooldown, skipping GPU")
-                _request_state.gpu_quota_exhausted = True
-                try:
-                    import gradio as gr
-                    gr.Warning("GPU temporarily unavailable — using CPU (slower).")
-                except Exception:
-                    pass
-                return func(*args, **kwargs)
-            # Try GPU first
             try:
                 return gpu_func(*args, **kwargs)
             except Exception as e:
-                print(f"[GPU] gpu_func error: {type(e).__name__}: {e}")
-                # ZeroGPU raises gradio.Error with title="ZeroGPU quota exceeded"
                 is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
                 if not is_quota_error:
-                    err_str = str(e).lower()
-                    is_quota_error = 'quota' in err_str and ('exceeded' in err_str or 'exhausted' in err_str)
                 if is_quota_error:
                     print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
                     _request_state.gpu_quota_exhausted = True
-                    # Parse reset time from message like "Try again in 13:53:59"
-                    match = re.search(r'Try again in (\d+:\d{2}:\d{2})', str(e))
                     if match:
                         _request_state.quota_reset_time = match.group(1)
-                    # NOT setting process-global flag: quota is per-user,
-                    # other users may still have quota available.
-                    # Show immediate toast notification
                     try:
                         import gradio as gr
                         reset_time = get_quota_reset_time()
                         reset_msg = f" Resets in {reset_time}." if reset_time else ""
                         gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
-                    except Exception:
-                        pass  # Not in a Gradio context (e.g., CLI usage)
-                    return func(*args, **kwargs)
-                # Check for CUDA hardware/driver errors (e.g. worker_init failure)
-                err_lower = str(e).lower()
-                is_cuda_error = any(p in err_lower for p in _CUDA_ERROR_PATTERNS)
-                # SDK wraps worker_init failures as gradio.Error(title="ZeroGPU worker error")
-                # with message = just the exception class name. Original CUDA message is lost.
-                is_sdk_worker_error = False
-                if not is_cuda_error:
-                    err_title = getattr(e, 'title', '') or ''
-                    is_sdk_worker_error = 'worker' in err_title.lower() and 'error' in err_title.lower()
-                if is_cuda_error:
-                    print(f"[GPU] CUDA error, falling back to CPU: {e}")
-                    _mark_cuda_unhealthy()
-                    _request_state.gpu_quota_exhausted = True
-                    try:
-                        import gradio as gr
-                        gr.Warning("GPU hardware error — switching to CPU (slower).")
-                    except Exception:
-                        pass
-                    return func(*args, **kwargs)
-                if is_sdk_worker_error:
-                    # worker_init failed — the assigned GPU UUID wasn't available.
-                    # This is transient: retrying calls client.schedule() which
-                    # assigns a different GPU. No sleep needed.
-                    print(f"[GPU] SDK worker error, retrying GPU (new schedule)...")
-                    try:
-                        result = gpu_func(*args, **kwargs)
-                        _mark_cuda_success()
-                        _reset_sdk_worker_failures()
-                        return result
-                    except Exception as retry_e:
-                        print(f"[GPU] GPU retry also failed: {retry_e}")
-                        _record_sdk_worker_failure()
-                    # Both attempts failed — fall through to CPU
-                    _request_state.gpu_quota_exhausted = True
-                    try:
-                        import gradio as gr
-                        gr.Warning("GPU temporarily unavailable — using CPU (slower).")
                     except Exception:
                         pass
                     return func(*args, **kwargs)
                 is_timeout = (
                     'timeout' in err_lower
                     or 'duration' in err_lower
@@ -481,19 +252,11 @@ def gpu_with_fallback(duration=60):
                     print(f"[GPU] Timeout error in {func.__name__}: {e}")
                     raise
-                # Unrecognized GPU/SDK error — fall back to CPU for THIS REQUEST only.
-                # Do NOT mark CUDA unhealthy: unknown errors are often transient SDK
-                # issues (worker scheduling, network, etc.) that don't indicate CUDA
-                # poisoning. Only genuine CUDA errors (matched above) should trigger
-                # process-wide health flags that block other users.
-                print(f"[GPU] Unrecognized GPU error, falling back to CPU: {type(e).__name__}: {e}")
-                _request_state.gpu_quota_exhausted = True
-                try:
-                    import gradio as gr
-                    gr.Warning("GPU error — using CPU (slower).")
-                except Exception:
-                    pass
-                return func(*args, **kwargs)
         return wrapper
     return decorator

 import os
 import re
 import threading
 from typing import Callable, TypeVar
 from functools import wraps
 # Per-thread (per-request) GPU state so concurrent requests don't interfere
 _request_state = threading.local()
 # ---------------------------------------------------------------------------
 # Shared RLock for model device transitions AND inference.
 # RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
 _models_stale = False  # Set True at lease end; drained at next lease start
 try:
     import spaces  # type: ignore
         _active_gpu_leases = max(0, _active_gpu_leases - 1)
 # =========================================================================
 # Per-thread state helpers
 # =========================================================================
     _request_state.user_forced_cpu = True
 # =========================================================================
 # Model cleanup helpers
 # =========================================================================
     The model_device_lock is held for the ENTIRE GPU lease (inference +
     cleanup) to prevent concurrent threads from moving models mid-inference.
+    Error handling strategy:
+    - Quota exhaustion → CPU fallback (per-user, not process issue)
+    - Timeout → propagate to caller
+    - Any other GPU error → os._exit(1) for clean process restart
     Usage:
         @gpu_with_fallback(duration=60)
         def my_gpu_func(data):
             with model_device_lock:
                 try:
                     _drain_stale_models()
+                    return func(*args, **kwargs)
                 finally:
                     try:
                         _cleanup_after_gpu()
                 print("[GPU] Quota exhausted, using CPU fallback")
                 return func(*args, **kwargs)
+            # Try GPU
             try:
                 return gpu_func(*args, **kwargs)
             except Exception as e:
+                err_str = str(e)
+                err_lower = err_str.lower()
+                # Quota exhaustion → CPU fallback (per-user, not process issue)
                 is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
                 if not is_quota_error:
+                    is_quota_error = 'quota' in err_lower and ('exceeded' in err_lower or 'exhausted' in err_lower)
                 if is_quota_error:
                     print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
                     _request_state.gpu_quota_exhausted = True
+                    match = re.search(r'Try again in (\d+:\d{2}:\d{2})', err_str)
                     if match:
                         _request_state.quota_reset_time = match.group(1)
                     try:
                         import gradio as gr
                         reset_time = get_quota_reset_time()
                         reset_msg = f" Resets in {reset_time}." if reset_time else ""
                         gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
                     except Exception:
                         pass
                     return func(*args, **kwargs)
+                # Timeout → propagate to caller
                 is_timeout = (
                     'timeout' in err_lower
                     or 'duration' in err_lower
                     print(f"[GPU] Timeout error in {func.__name__}: {e}")
                     raise
+                # ANY other GPU error → process is poisoned, kill immediately.
+                # HF Spaces auto-restarts the container with a fresh process.
+                print(f"[GPU] Fatal GPU error: {type(e).__name__}: {e}")
+                print("[GPU] Restarting process to recover clean GPU state...")
+                os._exit(1)
         return wrapper
     return decorator

src/pipeline.py CHANGED Viewed

@@ -174,8 +174,12 @@ def _run_post_vad_pipeline(
     # If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
     # with empty placeholders so indices stay aligned.
     if len(vad_segments) != len(phoneme_texts):
-        phoneme_texts = [[], []] + phoneme_texts[1:]
     # Anchor detection via phoneme n-gram voting
     progress(*progress_steps["anchor"])
@@ -206,7 +210,7 @@ def _run_post_vad_pipeline(
     # Phoneme-based DP alignment
     match_start = time.time()
-    match_results, match_profiling, gap_segments = run_phoneme_matching(
         phoneme_texts,
         surah,
         first_quran_idx,
@@ -237,6 +241,7 @@ def _run_post_vad_pipeline(
     profiling.tier2_segments = match_profiling.get("tier2_segments", [])
     profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
     profiling.special_merges = match_profiling.get("special_merges", 0)
     profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
     profiling.segments_passed = match_profiling.get("segments_passed", 0)
@@ -268,9 +273,19 @@ def _run_post_vad_pipeline(
     _underseg_by_words: list[int] = []
     _underseg_by_ayah: list[int] = []
     for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
         zip(vad_segments, match_results)
     ):
         if idx == last_display_idx and matched_ref:
             if not is_end_of_verse(matched_ref):
                 score = max(0.0, score - 0.25)
@@ -283,13 +298,15 @@ def _run_post_vad_pipeline(
             matched_ref = ""
             error = f"Low confidence ({score:.0%})"
-        duration = seg.end_time - seg.start_time
         word_count, ayah_span = get_segment_word_stats(matched_ref)
         underseg = check_undersegmented(matched_ref, duration)
         segments.append(SegmentInfo(
             start_time=seg.start_time,
-            end_time=seg.end_time,
             transcribed_text=phoneme_text,
             matched_text=matched_text,
             matched_ref=matched_ref,

     # If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
     # with empty placeholders so indices stay aligned.
+    # The split replaces one segment with two, so vad_segments is 1 longer.
+    # Insert an empty placeholder at the split position (= first_quran_idx - 2
+    # is where the combined segment was, but simpler: find the gap).
     if len(vad_segments) != len(phoneme_texts):
+        split_idx = first_quran_idx - 2  # Combined was split into 2 entries starting here
+        phoneme_texts = phoneme_texts[:split_idx] + [[], []] + phoneme_texts[split_idx + 1:]
     # Anchor detection via phoneme n-gram voting
     progress(*progress_steps["anchor"])
     # Phoneme-based DP alignment
     match_start = time.time()
+    match_results, match_profiling, gap_segments, merged_into = run_phoneme_matching(
         phoneme_texts,
         surah,
         first_quran_idx,
     profiling.tier2_segments = match_profiling.get("tier2_segments", [])
     profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
     profiling.special_merges = match_profiling.get("special_merges", 0)
+    profiling.transition_skips = match_profiling.get("transition_skips", 0)
     profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
     profiling.segments_passed = match_profiling.get("segments_passed", 0)
     _underseg_by_words: list[int] = []
     _underseg_by_ayah: list[int] = []
+    # Pre-compute merged end times: extend target segment's end_time
+    _merged_end_times = {}  # {target_idx: extended_end_time}
+    for consumed_idx, target_idx in merged_into.items():
+        if consumed_idx < len(vad_segments):
+            _merged_end_times[target_idx] = vad_segments[consumed_idx].end_time
     for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
         zip(vad_segments, match_results)
     ):
+        # Skip segments consumed by Tahmeed merge
+        if idx in merged_into:
+            continue
         if idx == last_display_idx and matched_ref:
             if not is_end_of_verse(matched_ref):
                 score = max(0.0, score - 0.25)
             matched_ref = ""
             error = f"Low confidence ({score:.0%})"
+        # Extend end_time if this segment absorbed a merged segment
+        seg_end_time = _merged_end_times.get(idx, seg.end_time)
+        duration = seg_end_time - seg.start_time
         word_count, ayah_span = get_segment_word_stats(matched_ref)
         underseg = check_undersegmented(matched_ref, duration)
         segments.append(SegmentInfo(
             start_time=seg.start_time,
+            end_time=seg_end_time,
             transcribed_text=phoneme_text,
             matched_text=matched_text,
             matched_ref=matched_ref,

src/ui/segments.py CHANGED Viewed

@@ -16,6 +16,7 @@ from config import (
     SURAH_INFO_PATH,
 )
 from src.core.segment_types import SegmentInfo
 def format_timestamp(seconds: float) -> str:
@@ -237,11 +238,14 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
         render_key: Unique key to prevent browser caching between renders
         segment_dir: Directory to write segment WAV files into
     """
     confidence_class = get_confidence_class(seg.match_score)
     confidence_badge_class = confidence_class  # preserve original for badge color
-    if seg.has_missing_words:
         confidence_class = "segment-low"
-    if seg.potentially_undersegmented and confidence_class != "segment-low":
         confidence_class = "segment-underseg"
     timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
@@ -330,7 +334,12 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
     else:
         text_html = ""
-    confidence_badge = "" if seg.has_missing_words else f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
     # Build inline header: Segment N | ref | duration | time range
     header_parts = [f"Segment {idx + 1}"]
@@ -389,9 +398,11 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
             wf.writeframes(audio_int16.tobytes())
         full_audio_url = f"/gradio_api/file={full_path}"
-    # Categorize segments by confidence level (1-indexed for display)
-    med_segments = [i + 1 for i, s in enumerate(segments) if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH]
-    low_segments = [i + 1 for i, s in enumerate(segments) if s.match_score < CONFIDENCE_MED]
     # Build header with confidence summary
     header_parts = []

     SURAH_INFO_PATH,
 )
 from src.core.segment_types import SegmentInfo
+from src.alignment.special_segments import ALL_SPECIAL_REFS
 def format_timestamp(seconds: float) -> str:
         render_key: Unique key to prevent browser caching between renders
         segment_dir: Directory to write segment WAV files into
     """
+    is_special = seg.matched_ref in ALL_SPECIAL_REFS
     confidence_class = get_confidence_class(seg.match_score)
     confidence_badge_class = confidence_class  # preserve original for badge color
+    if is_special:
+        confidence_class = "segment-special"
+    elif seg.has_missing_words:
         confidence_class = "segment-low"
+    elif seg.potentially_undersegmented and confidence_class != "segment-low":
         confidence_class = "segment-underseg"
     timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
     else:
         text_html = ""
+    if is_special:
+        confidence_badge = f'<div class="segment-badge segment-special-badge">{seg.matched_ref}</div>'
+    elif seg.has_missing_words:
+        confidence_badge = ""
+    else:
+        confidence_badge = f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
     # Build inline header: Segment N | ref | duration | time range
     header_parts = [f"Segment {idx + 1}"]
             wf.writeframes(audio_int16.tobytes())
         full_audio_url = f"/gradio_api/file={full_path}"
+    # Categorize segments by confidence level (1-indexed for display), excluding specials
+    med_segments = [i + 1 for i, s in enumerate(segments)
+                    if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH and s.matched_ref not in ALL_SPECIAL_REFS]
+    low_segments = [i + 1 for i, s in enumerate(segments)
+                    if s.match_score < CONFIDENCE_MED and s.matched_ref not in ALL_SPECIAL_REFS]
     # Build header with confidence summary
     header_parts = []

src/ui/styles.py CHANGED Viewed

@@ -400,6 +400,8 @@ def build_css() -> str:
     .segment-low-badge {{ background: #dc3545; }}
     .segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
     .segment-underseg-badge {{ background: #ff8c00; }}
     /* Review summary text colors */
     .segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
@@ -421,11 +423,13 @@ def build_css() -> str:
         .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
         .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
         .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
     }}
     /* Also support Gradio's dark class */
     .dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
     .dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
     .dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
     .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
     """

     .segment-low-badge {{ background: #dc3545; }}
     .segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
     .segment-underseg-badge {{ background: #ff8c00; }}
+    .segment-special {{ background: #e8eaf6; border-color: #5c6bc0; border-style: dashed; }}
+    .segment-special-badge {{ background: #5c6bc0; }}
     /* Review summary text colors */
     .segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
         .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
         .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
         .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
+        .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
     }}
     /* Also support Gradio's dark class */
     .dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
     .dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
     .dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
     .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
+    .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
     """