hetchyy Claude Opus 4.6 commited on
Commit
7f362a6
·
1 Parent(s): aadab30

Add transition segment detection for non-Quranic phrases (Amin, Takbir, Tahmeed)

Browse files

Detect and render Amin (after Al-Fatiha), Takbir (Allahu Akbar), and
Tahmeed (Sami'a Allahu liman hamidah) as special segment cards with
distinct styling. Includes Takbir-at-start detection before Isti'adha/
Basmala, transition mode state machine for consecutive non-Quranic
segments, Tahmeed peek-ahead merge for split sami'a+rabbana segments,
and unified special segment rendering with name badges.

Also simplify GPU error handling: replace per-error-type fallback with
immediate process restart for non-quota GPU errors.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

config.py CHANGED
@@ -133,6 +133,7 @@ LOOKBACK_WORDS = 30 # Window words to look back from pointer for
133
  LOOKAHEAD_WORDS = 10 # Window words to look ahead after expected end position
134
  MAX_EDIT_DISTANCE = 0.25 # Max normalized edit distance for valid ayah match
135
  MAX_SPECIAL_EDIT_DISTANCE = 0.35 # Max normalized edit distance for Basmala/Isti'adha detection
 
136
  START_PRIOR_WEIGHT = 0.005 # Penalty per word away from expected position
137
 
138
  # Failed Segments
@@ -234,17 +235,6 @@ PROGRESS_RETRANSCRIBE = {
234
 
235
  MFA_PROGRESS_SEGMENT_RATE = 0.05 # seconds per segment for progress bar animation
236
 
237
- # =============================================================================
238
- # CUDA health monitoring (ZeroGPU poisoning protection)
239
- # =============================================================================
240
-
241
- CUDA_COOLDOWN_SECONDS = 30 # Seconds before retrying GPU after CUDA error
242
- MAX_CUDA_FAILURES = 5 # Consecutive CUDA failures before process restart
243
-
244
- # SDK worker scheduling errors (transient — different from CUDA hardware errors)
245
- SDK_WORKER_COOLDOWN_SECONDS = 15 # Very short — scheduler issues resolve fast
246
- SDK_WORKER_FAILURE_THRESHOLD = 2 # After 2 consecutive failed retries → cooldown
247
-
248
  # =============================================================================
249
  # UI settings
250
  # =============================================================================
 
133
  LOOKAHEAD_WORDS = 10 # Window words to look ahead after expected end position
134
  MAX_EDIT_DISTANCE = 0.25 # Max normalized edit distance for valid ayah match
135
  MAX_SPECIAL_EDIT_DISTANCE = 0.35 # Max normalized edit distance for Basmala/Isti'adha detection
136
+ MAX_TRANSITION_EDIT_DISTANCE = 0.35 # Max normalized edit distance for transition segments (Amin/Takbir/Tahmeed)
137
  START_PRIOR_WEIGHT = 0.005 # Penalty per word away from expected position
138
 
139
  # Failed Segments
 
235
 
236
  MFA_PROGRESS_SEGMENT_RATE = 0.05 # seconds per segment for progress bar animation
237
 
 
 
 
 
 
 
 
 
 
 
 
238
  # =============================================================================
239
  # UI settings
240
  # =============================================================================
src/alignment/alignment_pipeline.py CHANGED
@@ -18,7 +18,7 @@ def run_phoneme_matching(
18
  first_quran_idx: int = 0,
19
  special_results: List[tuple] = None,
20
  start_pointer: int = 0,
21
- ) -> Tuple[List[tuple], dict, set]:
22
  """
23
  Phoneme-based segment matching using substring DP.
24
 
@@ -30,8 +30,9 @@ def run_phoneme_matching(
30
  start_pointer: Initial word pointer from anchor voting
31
 
32
  Returns:
33
- (results, profiling_dict, gap_segments)
34
  results: List[(matched_text, score, matched_ref), ...]
 
35
  """
36
  from .phoneme_matcher import align_segment, get_matched_text
37
  from .phoneme_matcher_cache import get_chapter_reference
@@ -62,7 +63,10 @@ def run_phoneme_matching(
62
  result_build_total = 0.0
63
 
64
  # Track whether the next segment might have Basmala fused with verse content
65
- from .special_segments import SPECIAL_PHONEMES, SPECIAL_TEXT
 
 
 
66
  basmala_already_detected = any(
67
  r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
68
  )
@@ -70,6 +74,12 @@ def run_phoneme_matching(
70
 
71
  special_merges = 0
72
 
 
 
 
 
 
 
73
  # Gap tracking (initialized here so inline chapter-transition checks can add entries)
74
  gap_segments = set()
75
  transition_expected_pointer = -1 # -1 = no pending check
@@ -111,9 +121,60 @@ def run_phoneme_matching(
111
  skip_count -= 1
112
  continue
113
 
 
 
 
 
 
 
 
 
 
114
  segment_idx = first_quran_idx + i + 1 # 1-indexed for display
115
  segments_attempted += 1
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
118
  num_segments += 1
119
 
@@ -125,8 +186,22 @@ def run_phoneme_matching(
125
 
126
  # Chapter transition: pointer past end of chapter
127
  if alignment is None and pointer >= chapter_ref.num_words:
128
- from .special_segments import detect_inter_chapter_specials
129
  remaining_phonemes = phoneme_texts[first_quran_idx + i:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
131
 
132
  if chapter_ref.surah == 1:
@@ -134,8 +209,8 @@ def run_phoneme_matching(
134
  print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
135
  f"running global reanchor...")
136
 
137
- # Use segments after specials for anchor voting
138
- anchor_offset = first_quran_idx + i + num_consumed
139
  anchor_remaining = phoneme_texts[anchor_offset:]
140
 
141
  reanchor_surah, reanchor_ayah = find_anchor_by_voting(
@@ -171,6 +246,18 @@ def run_phoneme_matching(
171
  detected_surah = next_surah
172
  consecutive_failures = 0
173
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  if num_consumed > 0:
175
  has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
176
  is_first_after_transition = not has_basmala
@@ -237,6 +324,29 @@ def run_phoneme_matching(
237
  _check_transition_gap(alignment.start_word_idx)
238
  segments_passed += 1
239
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # === Graduated retry ===
241
  # Tier 1: expanded window, same threshold
242
  tier1_attempts += 1
@@ -381,6 +491,7 @@ def run_phoneme_matching(
381
  "segments_attempted": segments_attempted,
382
  "segments_passed": segments_passed,
383
  "special_merges": special_merges,
 
384
  }
385
  else:
386
  profiling = {
@@ -395,6 +506,7 @@ def run_phoneme_matching(
395
  "segments_attempted": segments_attempted,
396
  "segments_passed": segments_passed,
397
  "special_merges": special_merges,
 
398
  }
399
 
400
- return results, profiling, gap_segments
 
18
  first_quran_idx: int = 0,
19
  special_results: List[tuple] = None,
20
  start_pointer: int = 0,
21
+ ) -> Tuple[List[tuple], dict, set, dict]:
22
  """
23
  Phoneme-based segment matching using substring DP.
24
 
 
30
  start_pointer: Initial word pointer from anchor voting
31
 
32
  Returns:
33
+ (results, profiling_dict, gap_segments, merged_into)
34
  results: List[(matched_text, score, matched_ref), ...]
35
+ merged_into: dict mapping consumed segment indices to their target segment index
36
  """
37
  from .phoneme_matcher import align_segment, get_matched_text
38
  from .phoneme_matcher_cache import get_chapter_reference
 
63
  result_build_total = 0.0
64
 
65
  # Track whether the next segment might have Basmala fused with verse content
66
+ from .special_segments import (
67
+ SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT,
68
+ detect_transition_segment, detect_inter_chapter_specials,
69
+ )
70
  basmala_already_detected = any(
71
  r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
72
  )
 
74
 
75
  special_merges = 0
76
 
77
+ # Transition segment state
78
+ transition_mode = False
79
+ transition_skips = 0
80
+ tahmeed_merge_skip = 0
81
+ merged_into = {} # {consumed_idx: target_idx}
82
+
83
  # Gap tracking (initialized here so inline chapter-transition checks can add entries)
84
  gap_segments = set()
85
  transition_expected_pointer = -1 # -1 = no pending check
 
121
  skip_count -= 1
122
  continue
123
 
124
+ # Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments)
125
+ if tahmeed_merge_skip > 0:
126
+ # This segment's audio was merged into the previous Tahmeed segment
127
+ results.append(("", 0.0, ""))
128
+ word_indices.append(None)
129
+ tahmeed_merge_skip -= 1
130
+ transition_skips += 1
131
+ continue
132
+
133
  segment_idx = first_quran_idx + i + 1 # 1-indexed for display
134
  segments_attempted += 1
135
 
136
+ # Transition mode: keep checking for transitions before trying alignment
137
+ if transition_mode:
138
+ trans_name, trans_conf = detect_transition_segment(asr_phonemes)
139
+ if trans_name:
140
+ print(f" [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
141
+ results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name))
142
+ word_indices.append(None)
143
+ transition_skips += 1
144
+
145
+ # Tahmeed peek-ahead for merge
146
+ if trans_name == "Tahmeed":
147
+ next_abs = first_quran_idx + i + 1
148
+ if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
149
+ resp_name, resp_conf = detect_transition_segment(
150
+ phoneme_texts[next_abs], allowed={"Tahmeed"})
151
+ if resp_name:
152
+ merged_into[next_abs] = first_quran_idx + i
153
+ tahmeed_merge_skip = 1
154
+ print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
155
+
156
+ continue
157
+ else:
158
+ # Exit transition mode, global reanchor
159
+ transition_mode = False
160
+ print(f" [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...")
161
+ remaining_idx = first_quran_idx + i
162
+ remaining_texts = phoneme_texts[remaining_idx:]
163
+ if remaining_texts:
164
+ reanchor_surah, reanchor_ayah = find_anchor_by_voting(
165
+ remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS,
166
+ )
167
+ if reanchor_surah > 0:
168
+ if reanchor_surah != detected_surah:
169
+ detected_surah = reanchor_surah
170
+ chapter_ref = get_chapter_reference(detected_surah)
171
+ pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
172
+ transition_expected_pointer = pointer
173
+ print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, "
174
+ f"Ayah {reanchor_ayah}, word {pointer}")
175
+ consecutive_failures = 0
176
+ # Fall through to normal alignment below
177
+
178
  alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
179
  num_segments += 1
180
 
 
186
 
187
  # Chapter transition: pointer past end of chapter
188
  if alignment is None and pointer >= chapter_ref.num_words:
 
189
  remaining_phonemes = phoneme_texts[first_quran_idx + i:]
190
+ amin_consumed = 0
191
+
192
+ if chapter_ref.surah == 1:
193
+ # Check for Amin after Al-Fatiha before inter-chapter specials
194
+ amin_name, amin_conf = detect_transition_segment(
195
+ asr_phonemes, allowed={"Amin"})
196
+ if amin_name:
197
+ print(f" [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})")
198
+ results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin"))
199
+ word_indices.append(None)
200
+ transition_skips += 1
201
+ amin_consumed = 1
202
+ # Re-slice remaining phonemes to start after Amin
203
+ remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:]
204
+
205
  inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
206
 
207
  if chapter_ref.surah == 1:
 
209
  print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
210
  f"running global reanchor...")
211
 
212
+ # Use segments after Amin + specials for anchor voting
213
+ anchor_offset = first_quran_idx + i + amin_consumed + num_consumed
214
  anchor_remaining = phoneme_texts[anchor_offset:]
215
 
216
  reanchor_surah, reanchor_ayah = find_anchor_by_voting(
 
246
  detected_surah = next_surah
247
  consecutive_failures = 0
248
 
249
+ if amin_consumed > 0:
250
+ # Current segment was Amin (already appended above).
251
+ # Queue inter-chapter specials for subsequent segments.
252
+ has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
253
+ is_first_after_transition = not has_basmala
254
+ if num_consumed > 0:
255
+ pending_specials = list(inter_specials)
256
+ skip_count = num_consumed
257
+ else:
258
+ is_first_after_transition = True
259
+ continue
260
+
261
  if num_consumed > 0:
262
  has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
263
  is_first_after_transition = not has_basmala
 
324
  _check_transition_gap(alignment.start_word_idx)
325
  segments_passed += 1
326
  else:
327
+ # === Check for transition segment before retry tiers ===
328
+ trans_name, trans_conf = detect_transition_segment(asr_phonemes)
329
+ if trans_name:
330
+ print(f" [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
331
+ result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name)
332
+ word_indices.append(None)
333
+ transition_skips += 1
334
+ transition_mode = True
335
+
336
+ # Tahmeed peek-ahead for merge
337
+ if trans_name == "Tahmeed":
338
+ next_abs = first_quran_idx + i + 1
339
+ if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
340
+ resp_name, resp_conf = detect_transition_segment(
341
+ phoneme_texts[next_abs], allowed={"Tahmeed"})
342
+ if resp_name:
343
+ merged_into[next_abs] = first_quran_idx + i
344
+ tahmeed_merge_skip = 1
345
+ print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
346
+
347
+ results.append(result)
348
+ continue
349
+
350
  # === Graduated retry ===
351
  # Tier 1: expanded window, same threshold
352
  tier1_attempts += 1
 
491
  "segments_attempted": segments_attempted,
492
  "segments_passed": segments_passed,
493
  "special_merges": special_merges,
494
+ "transition_skips": transition_skips,
495
  }
496
  else:
497
  profiling = {
 
506
  "segments_attempted": segments_attempted,
507
  "segments_passed": segments_passed,
508
  "special_merges": special_merges,
509
+ "transition_skips": transition_skips,
510
  }
511
 
512
+ return results, profiling, gap_segments, merged_into
src/alignment/special_segments.py CHANGED
@@ -16,7 +16,7 @@ from typing import List, Tuple, Optional
16
  # Constants
17
  # =============================================================================
18
 
19
- from config import MAX_SPECIAL_EDIT_DISTANCE
20
 
21
  # Special phoneme sequences
22
  SPECIAL_PHONEMES = {
@@ -40,6 +40,66 @@ SPECIAL_TEXT = {
40
  "Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # =============================================================================
45
  # Levenshtein Distance
@@ -141,8 +201,25 @@ def detect_special_segments(
141
 
142
  special_results: List[Tuple[str, float, str]] = []
143
 
144
- # Segment 0 phonemes (already a list)
 
 
 
145
  seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # ==========================================================================
148
  # 1. Try COMBINED (Isti'adha + Basmala in one segment)
@@ -152,82 +229,88 @@ def detect_special_segments(
152
  if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
153
  print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
154
 
155
- # Split segment 0 by midpoint
156
- seg = vad_segments[0]
157
- audio = segment_audios[0]
158
  mid_time = (seg.start_time + seg.end_time) / 2.0
159
  mid_sample = max(1, len(audio) // 2)
160
 
161
- # Create two new segments
162
- new_vads = [
163
- VadSegment(start_time=seg.start_time, end_time=mid_time, segment_idx=0),
164
- VadSegment(start_time=mid_time, end_time=seg.end_time, segment_idx=1),
165
- ]
166
- new_audios = [
167
- audio[:mid_sample],
168
- audio[mid_sample:],
169
- ]
170
 
171
  # Add remaining segments with reindexed segment_idx
172
- for i, vs in enumerate(vad_segments[1:], start=2):
173
  new_vads.append(VadSegment(
174
  start_time=vs.start_time,
175
  end_time=vs.end_time,
176
- segment_idx=i
177
  ))
178
- new_audios.extend(segment_audios[1:])
179
 
180
  # Special results for both (confidence = 1 - distance)
181
  confidence = 1.0 - combined_dist
182
- special_results = [
183
  (SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
184
  (SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
185
- ]
186
 
187
- return new_vads, new_audios, special_results, 2
188
 
189
  # ==========================================================================
190
- # 2. Try Isti'adha on segment 0
191
  # ==========================================================================
192
  istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
193
 
194
  if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
195
- print(f"[SPECIAL] Isti'adha detected on segment 0 (dist={istiadha_dist:.2f})")
196
  special_results.append(
197
  (SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
198
  )
199
 
200
- # Try Basmala on segment 1
201
- if len(phoneme_texts) >= 2 and phoneme_texts[1]:
202
- seg1_phonemes = phoneme_texts[1]
 
203
  basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
204
 
205
  if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
206
- print(f"[SPECIAL] Basmala detected on segment 1 (dist={basmala_dist:.2f})")
207
  special_results.append(
208
  (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
209
  )
210
- return vad_segments, segment_audios, special_results, 2
211
  else:
212
- print(f"[SPECIAL] No Basmala on segment 1 (dist={basmala_dist:.2f})")
213
 
214
- return vad_segments, segment_audios, special_results, 1
215
 
216
  # ==========================================================================
217
- # 3. Try Basmala on segment 0
218
  # ==========================================================================
219
  basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
220
 
221
  if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
222
- print(f"[SPECIAL] Basmala detected on segment 0 (dist={basmala_dist:.2f})")
223
  special_results.append(
224
  (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
225
  )
226
- return vad_segments, segment_audios, special_results, 1
227
 
228
  # ==========================================================================
229
- # 4. No specials detected
230
  # ==========================================================================
 
 
 
 
 
231
  print(f"[SPECIAL] No special segments detected "
232
  f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
233
 
@@ -293,3 +376,57 @@ def detect_inter_chapter_specials(
293
  print(f"[INTER-CHAPTER] No special segments detected "
294
  f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
295
  return [], 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Constants
17
  # =============================================================================
18
 
19
+ from config import MAX_SPECIAL_EDIT_DISTANCE, MAX_TRANSITION_EDIT_DISTANCE
20
 
21
  # Special phoneme sequences
22
  SPECIAL_PHONEMES = {
 
40
  "Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
41
  }
42
 
43
+ # Transition phoneme sequences (non-Quranic phrases within recitations)
44
+ TRANSITION_PHONEMES = {
45
+ # آمِين — not in Quran, manually constructed (5 phonemes)
46
+ "Amin": ["ʔ", "a:", "m", "i:", "n"],
47
+
48
+ # اللَّهُ أَكْبَر — standalone, hamza wasl pronounced (12 phonemes)
49
+ # Heavy lam (lˤlˤ) because start of utterance → fatha context
50
+ "Takbir": [
51
+ "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
52
+ "ʔ", "a", "k", "b", "a", "rˤ",
53
+ ],
54
+
55
+ # اللَّهُ أَكْبَر اللَّهُ أَكْبَر — repeated twice in one segment (24 phonemes)
56
+ # Some reciters say Takbir twice. Internal comparison only — display text stays single.
57
+ "Takbir_double": [
58
+ "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
59
+ "ʔ", "a", "k", "b", "a", "rˤ",
60
+ "ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
61
+ "ʔ", "a", "k", "b", "a", "rˤ",
62
+ ],
63
+
64
+ # سَمِعَ اللَّهُ لِمَنْ حَمِدَه — waqf on final word (22 phonemes)
65
+ # Heavy lam after fatha of سَمِعَ; izhar noon before ح (throat letter)
66
+ "Tahmeed": [
67
+ "s", "a", "m", "i", "ʕ", "a",
68
+ "lˤlˤ", "aˤ:", "h", "u",
69
+ "l", "i", "m", "a", "n",
70
+ "ħ", "a", "m", "i", "d", "a", "h",
71
+ ],
72
+
73
+ # سَمِعَ اللَّهُ لِمَنْ حَمِدَهُ رَبَّنَا وَلَكَ الْحَمْد — both in one segment (39 phonemes)
74
+ # Connected speech: هُ keeps damma (no waqf mid-phrase), waqf on الحمد
75
+ "Tahmeed_combined": [
76
+ "s", "a", "m", "i", "ʕ", "a",
77
+ "lˤlˤ", "aˤ:", "h", "u",
78
+ "l", "i", "m", "a", "n",
79
+ "ħ", "a", "m", "i", "d", "a", "h", "u",
80
+ "rˤ", "aˤ", "bb", "a", "n", "a:",
81
+ "w", "a", "l", "a", "k", "a",
82
+ "l", "ħ", "a", "m", "d",
83
+ ],
84
+
85
+ # رَبَّنَا وَلَكَ الْحَمْد — response only, for merge detection (17 phonemes)
86
+ # Heavy ra with emphatic fatha; ح is lunar so lam of ال is pronounced
87
+ "Tahmeed_response": [
88
+ "rˤ", "aˤ", "bb", "a", "n", "a:",
89
+ "w", "a", "l", "a", "k", "a",
90
+ "l", "ħ", "a", "m", "d",
91
+ ],
92
+ }
93
+
94
+ TRANSITION_TEXT = {
95
+ "Amin": "آمِين",
96
+ "Takbir": "اللَّهُ أَكْبَر",
97
+ "Tahmeed": "سَمِعَ اللَّهُ لِمَنْ حَمِدَه",
98
+ }
99
+
100
+ # All special segment reference names (for unified rendering)
101
+ ALL_SPECIAL_REFS = {"Basmala", "Isti'adha", "Isti'adha+Basmala", "Amin", "Takbir", "Tahmeed"}
102
+
103
 
104
  # =============================================================================
105
  # Levenshtein Distance
 
201
 
202
  special_results: List[Tuple[str, float, str]] = []
203
 
204
+ # ==========================================================================
205
+ # 0. Check segment 0 for Takbir (recitation opener before Isti'adha/Basmala)
206
+ # ==========================================================================
207
+ takbir_offset = 0
208
  seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
209
+ takbir_name, takbir_conf = detect_transition_segment(seg0_phonemes, allowed={"Takbir"})
210
+ if takbir_name:
211
+ print(f"[SPECIAL] Takbir detected on segment 0 (conf={takbir_conf:.2f})")
212
+ special_results.append((TRANSITION_TEXT["Takbir"], takbir_conf, "Takbir"))
213
+ takbir_offset = 1
214
+ # Re-point to the next segment for Isti'adha/Basmala detection
215
+ if len(phoneme_texts) > 1:
216
+ seg0_phonemes = phoneme_texts[1] if phoneme_texts[1] else []
217
+ else:
218
+ return vad_segments, segment_audios, special_results, takbir_offset
219
+
220
+ # seg0_phonemes now points to the first non-Takbir segment
221
+ # (segment 0 if no Takbir, segment 1 if Takbir detected)
222
+ check_idx = takbir_offset # Index into phoneme_texts for Isti'adha/Basmala detection
223
 
224
  # ==========================================================================
225
  # 1. Try COMBINED (Isti'adha + Basmala in one segment)
 
229
  if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
230
  print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
231
 
232
+ # Split the combined segment by midpoint
233
+ seg = vad_segments[check_idx]
234
+ audio = segment_audios[check_idx]
235
  mid_time = (seg.start_time + seg.end_time) / 2.0
236
  mid_sample = max(1, len(audio) // 2)
237
 
238
+ # Rebuild vad/audio lists: keep segments before check_idx, split, then rest
239
+ new_vads = list(vad_segments[:check_idx])
240
+ new_audios = list(segment_audios[:check_idx])
241
+
242
+ split_start_idx = len(new_vads)
243
+ new_vads.append(VadSegment(start_time=seg.start_time, end_time=mid_time, segment_idx=split_start_idx))
244
+ new_vads.append(VadSegment(start_time=mid_time, end_time=seg.end_time, segment_idx=split_start_idx + 1))
245
+ new_audios.append(audio[:mid_sample])
246
+ new_audios.append(audio[mid_sample:])
247
 
248
  # Add remaining segments with reindexed segment_idx
249
+ for ii, vs in enumerate(vad_segments[check_idx + 1:], start=split_start_idx + 2):
250
  new_vads.append(VadSegment(
251
  start_time=vs.start_time,
252
  end_time=vs.end_time,
253
+ segment_idx=ii
254
  ))
255
+ new_audios.extend(segment_audios[check_idx + 1:])
256
 
257
  # Special results for both (confidence = 1 - distance)
258
  confidence = 1.0 - combined_dist
259
+ special_results.extend([
260
  (SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
261
  (SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
262
+ ])
263
 
264
+ return new_vads, new_audios, special_results, takbir_offset + 2
265
 
266
  # ==========================================================================
267
+ # 2. Try Isti'adha on the check segment
268
  # ==========================================================================
269
  istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
270
 
271
  if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
272
+ print(f"[SPECIAL] Isti'adha detected on segment {check_idx} (dist={istiadha_dist:.2f})")
273
  special_results.append(
274
  (SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
275
  )
276
 
277
+ # Try Basmala on the next segment
278
+ next_idx = check_idx + 1
279
+ if next_idx < len(phoneme_texts) and phoneme_texts[next_idx]:
280
+ seg1_phonemes = phoneme_texts[next_idx]
281
  basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
282
 
283
  if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
284
+ print(f"[SPECIAL] Basmala detected on segment {next_idx} (dist={basmala_dist:.2f})")
285
  special_results.append(
286
  (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
287
  )
288
+ return vad_segments, segment_audios, special_results, takbir_offset + 2
289
  else:
290
+ print(f"[SPECIAL] No Basmala on segment {next_idx} (dist={basmala_dist:.2f})")
291
 
292
+ return vad_segments, segment_audios, special_results, takbir_offset + 1
293
 
294
  # ==========================================================================
295
+ # 3. Try Basmala on the check segment
296
  # ==========================================================================
297
  basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
298
 
299
  if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
300
+ print(f"[SPECIAL] Basmala detected on segment {check_idx} (dist={basmala_dist:.2f})")
301
  special_results.append(
302
  (SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
303
  )
304
+ return vad_segments, segment_audios, special_results, takbir_offset + 1
305
 
306
  # ==========================================================================
307
+ # 4. No specials detected (beyond Takbir if any)
308
  # ==========================================================================
309
+ if takbir_offset > 0:
310
+ print(f"[SPECIAL] Only Takbir detected, no Isti'adha/Basmala "
311
+ f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
312
+ return vad_segments, segment_audios, special_results, takbir_offset
313
+
314
  print(f"[SPECIAL] No special segments detected "
315
  f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
316
 
 
376
  print(f"[INTER-CHAPTER] No special segments detected "
377
  f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
378
  return [], 0
379
+
380
+
381
+ # =============================================================================
382
+ # Transition Segment Detection
383
+ # =============================================================================
384
+
385
+ # Mapping from variant names to their base/display name
386
+ _TRANSITION_BASE_NAMES = {
387
+ "Takbir_double": "Takbir",
388
+ "Tahmeed_combined": "Tahmeed",
389
+ "Tahmeed_response": "Tahmeed",
390
+ }
391
+
392
+
393
+ def detect_transition_segment(
394
+ asr_phonemes: List[str],
395
+ allowed: Optional[set] = None,
396
+ ) -> Tuple[Optional[str], float]:
397
+ """Best-match transition (lowest edit dist under threshold).
398
+
399
+ Compares against all TRANSITION_PHONEMES entries. For entries with variant
400
+ suffixes (e.g. Takbir_double), the returned name is the base name (Takbir)
401
+ — variants only affect internal matching, not display. Best match = lowest
402
+ normalized edit distance.
403
+
404
+ Args:
405
+ asr_phonemes: ASR output phoneme sequence for one segment
406
+ allowed: Optional set of base names to restrict detection to
407
+ (e.g. {"Amin"} to only check Amin)
408
+
409
+ Returns:
410
+ (name, confidence) where name is the base transition name or None,
411
+ and confidence = 1 - normalized_edit_distance.
412
+ """
413
+ if not asr_phonemes:
414
+ return None, 0.0
415
+
416
+ best_name = None
417
+ best_dist = float("inf")
418
+
419
+ for key, ref_phonemes in TRANSITION_PHONEMES.items():
420
+ base_name = _TRANSITION_BASE_NAMES.get(key, key)
421
+ if allowed is not None and base_name not in allowed:
422
+ continue
423
+
424
+ dist = phoneme_edit_distance(asr_phonemes, ref_phonemes)
425
+ if dist < best_dist:
426
+ best_dist = dist
427
+ best_name = base_name
428
+
429
+ if best_dist <= MAX_TRANSITION_EDIT_DISTANCE and best_name is not None:
430
+ return best_name, 1.0 - best_dist
431
+
432
+ return None, 0.0
src/core/segment_types.py CHANGED
@@ -67,6 +67,7 @@ class ProfilingData:
67
  segments_attempted: int = 0
68
  segments_passed: int = 0
69
  special_merges: int = 0
 
70
  # Result building profiling
71
  result_build_time: float = 0.0 # Total result building time
72
  result_audio_encode_time: float = 0.0 # Audio-to-data-URL encoding
@@ -140,6 +141,7 @@ class ProfilingData:
140
  f" Tier 2 Retries: {self.tier2_passed}/{self.tier2_attempts} passed segments: {t2_segs}",
141
  f" Reanchors (consec failures): {self.consec_reanchors}",
142
  f" Special Merges: {self.special_merges}",
 
143
  "-" * 60,
144
  ]
145
  profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time
 
67
  segments_attempted: int = 0
68
  segments_passed: int = 0
69
  special_merges: int = 0
70
+ transition_skips: int = 0
71
  # Result building profiling
72
  result_build_time: float = 0.0 # Total result building time
73
  result_audio_encode_time: float = 0.0 # Audio-to-data-URL encoding
 
141
  f" Tier 2 Retries: {self.tier2_passed}/{self.tier2_attempts} passed segments: {t2_segs}",
142
  f" Reanchors (consec failures): {self.consec_reanchors}",
143
  f" Special Merges: {self.special_merges}",
144
+ f" Transition Skips: {self.transition_skips}",
145
  "-" * 60,
146
  ]
147
  profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time
src/core/zero_gpu.py CHANGED
@@ -5,7 +5,6 @@ local or non-ZeroGPU environments.
5
 
6
  import os
7
  import re
8
- import time as _time
9
  import threading
10
  from typing import Callable, TypeVar
11
  from functools import wraps
@@ -18,27 +17,6 @@ ZERO_GPU_AVAILABLE = False
18
  # Per-thread (per-request) GPU state so concurrent requests don't interfere
19
  _request_state = threading.local()
20
 
21
- # ---------------------------------------------------------------------------
22
- # Process-global CUDA health tracking.
23
- # Unlike _request_state (thread-local, per-request), this affects ALL threads.
24
- # When CUDA errors occur we mark it unhealthy so no request attempts GPU
25
- # until the cooldown expires — preventing further poisoning of torch's
26
- # internal CUDA state.
27
- # ---------------------------------------------------------------------------
28
- _cuda_health_lock = threading.Lock()
29
- _cuda_healthy = True
30
- _cuda_unhealthy_since = 0.0 # timestamp of first CUDA failure
31
- _consecutive_cuda_failures = 0 # track persistent failures across cooldowns
32
-
33
- # ---------------------------------------------------------------------------
34
- # SDK worker scheduling failure tracking (separate from CUDA hardware errors).
35
- # Worker errors are transient — the assigned GPU UUID isn't available when
36
- # worker_init runs. Retrying gets a different GPU via a new schedule() call.
37
- # ---------------------------------------------------------------------------
38
- _sdk_worker_lock = threading.Lock()
39
- _sdk_worker_consecutive_failures = 0
40
- _sdk_worker_cooldown_until = 0.0 # timestamp when cooldown expires
41
-
42
  # ---------------------------------------------------------------------------
43
  # Shared RLock for model device transitions AND inference.
44
  # RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
@@ -56,17 +34,6 @@ _active_gpu_leases = 0
56
  _models_stale = False # Set True at lease end; drained at next lease start
57
 
58
 
59
- # CUDA error patterns that should trigger CPU fallback (not re-raise).
60
- # These indicate hardware/driver issues where retrying GPU won't help.
61
- _CUDA_ERROR_PATTERNS = (
62
- "no cuda gpus are available",
63
- "cuda must not be initialized",
64
- "cuda error", "cuda out of memory",
65
- "cuda driver", "cuda runtime",
66
- "device-side assert", "cublas", "cudnn error", "nccl",
67
- "gpu task aborted", # ZeroGPU SDK wraps CUDA errors with this message
68
- )
69
-
70
  try:
71
  import spaces # type: ignore
72
 
@@ -106,11 +73,6 @@ def _exit_gpu_lease():
106
  _active_gpu_leases = max(0, _active_gpu_leases - 1)
107
 
108
 
109
- def is_gpu_lease_active():
110
- """Check if any thread currently holds a GPU lease."""
111
- return _active_gpu_leases > 0
112
-
113
-
114
  # =========================================================================
115
  # Per-thread state helpers
116
  # =========================================================================
@@ -147,128 +109,6 @@ def force_cpu_mode():
147
  _request_state.user_forced_cpu = True
148
 
149
 
150
- # =========================================================================
151
- # Process-level CUDA health
152
- # =========================================================================
153
-
154
- def _is_cuda_healthy() -> bool:
155
- """Check if CUDA is considered healthy at the process level.
156
-
157
- If unhealthy, checks whether the cooldown has expired.
158
- If expired, allows exactly one retry by marking healthy again.
159
- """
160
- global _cuda_healthy
161
-
162
- if _cuda_healthy:
163
- return True
164
-
165
- from config import CUDA_COOLDOWN_SECONDS
166
- with _cuda_health_lock:
167
- if _cuda_healthy:
168
- return True # Another thread already recovered
169
- elapsed = _time.time() - _cuda_unhealthy_since
170
- if elapsed >= CUDA_COOLDOWN_SECONDS:
171
- print(f"[CUDA HEALTH] Cooldown expired ({elapsed:.0f}s), allowing GPU retry")
172
- _try_reset_cuda_state()
173
- _cuda_healthy = True
174
- return True
175
- return False
176
-
177
-
178
- def _mark_cuda_unhealthy():
179
- """Mark CUDA as unhealthy process-wide after a CUDA error.
180
-
181
- All subsequent requests will skip GPU until cooldown expires.
182
- After MAX_CUDA_FAILURES consecutive failures, restarts the process
183
- (HF Spaces will restart the container automatically).
184
- """
185
- global _cuda_healthy, _cuda_unhealthy_since, _consecutive_cuda_failures
186
- with _cuda_health_lock:
187
- _consecutive_cuda_failures += 1
188
- if _cuda_healthy:
189
- _cuda_unhealthy_since = _time.time()
190
- _cuda_healthy = False
191
- print(f"[CUDA HEALTH] Marked CUDA unhealthy (consecutive failures: {_consecutive_cuda_failures})")
192
-
193
- from config import MAX_CUDA_FAILURES
194
- if _consecutive_cuda_failures >= MAX_CUDA_FAILURES:
195
- print(f"[CUDA HEALTH] {_consecutive_cuda_failures} consecutive CUDA failures — "
196
- "process permanently poisoned, forcing restart")
197
- os._exit(1)
198
-
199
-
200
- def _mark_cuda_success():
201
- """Reset consecutive failure counter after a successful GPU operation."""
202
- global _consecutive_cuda_failures
203
- with _cuda_health_lock:
204
- if _consecutive_cuda_failures > 0:
205
- print(f"[CUDA HEALTH] GPU succeeded, resetting failure counter (was {_consecutive_cuda_failures})")
206
- _consecutive_cuda_failures = 0
207
-
208
-
209
- def _try_reset_cuda_state():
210
- """Reset torch.cuda internal state so a fresh GPU attempt can re-init cleanly.
211
-
212
- After CUDA poisoning, torch.cuda._initialized remains True even though
213
- the underlying CUDA context is dead. Resetting it allows the next
214
- torch.cuda call to re-initialize from scratch.
215
- """
216
- try:
217
- import torch.cuda as _cuda
218
- if getattr(_cuda, '_initialized', False):
219
- _cuda._initialized = False
220
- print("[CUDA HEALTH] Reset torch.cuda._initialized")
221
- if hasattr(_cuda, '_queued_calls'):
222
- _cuda._queued_calls.clear()
223
- except Exception as e:
224
- print(f"[CUDA HEALTH] CUDA state reset failed (non-fatal): {e}")
225
-
226
-
227
- # =========================================================================
228
- # SDK worker scheduling health
229
- # =========================================================================
230
-
231
- def _record_sdk_worker_failure():
232
- """Record a failed SDK worker retry. After threshold → enter cooldown."""
233
- global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
234
- from config import SDK_WORKER_COOLDOWN_SECONDS, SDK_WORKER_FAILURE_THRESHOLD
235
- with _sdk_worker_lock:
236
- _sdk_worker_consecutive_failures += 1
237
- if _sdk_worker_consecutive_failures >= SDK_WORKER_FAILURE_THRESHOLD:
238
- _sdk_worker_cooldown_until = _time.time() + SDK_WORKER_COOLDOWN_SECONDS
239
- print(f"[GPU] SDK worker: {_sdk_worker_consecutive_failures} consecutive failures, "
240
- f"cooldown for {SDK_WORKER_COOLDOWN_SECONDS}s")
241
-
242
-
243
- def _reset_sdk_worker_failures():
244
- """Reset SDK worker failure counter after a successful GPU operation."""
245
- global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
246
- with _sdk_worker_lock:
247
- if _sdk_worker_consecutive_failures > 0:
248
- print(f"[GPU] SDK worker: GPU succeeded, resetting failure counter "
249
- f"(was {_sdk_worker_consecutive_failures})")
250
- _sdk_worker_consecutive_failures = 0
251
- _sdk_worker_cooldown_until = 0.0
252
-
253
-
254
- def _is_sdk_worker_healthy() -> bool:
255
- """Check if SDK worker scheduling is healthy (not in cooldown).
256
-
257
- Returns True if no cooldown active or cooldown has expired.
258
- Auto-resets on expiry so the next attempt can retry GPU.
259
- """
260
- global _sdk_worker_cooldown_until
261
- with _sdk_worker_lock:
262
- if _sdk_worker_cooldown_until <= 0.0:
263
- return True
264
- now = _time.time()
265
- if now >= _sdk_worker_cooldown_until:
266
- print("[GPU] SDK worker cooldown expired, allowing GPU retry")
267
- _sdk_worker_cooldown_until = 0.0
268
- return True
269
- return False
270
-
271
-
272
  # =========================================================================
273
  # Model cleanup helpers
274
  # =========================================================================
@@ -325,6 +165,11 @@ def gpu_with_fallback(duration=60):
325
  The model_device_lock is held for the ENTIRE GPU lease (inference +
326
  cleanup) to prevent concurrent threads from moving models mid-inference.
327
 
 
 
 
 
 
328
  Usage:
329
  @gpu_with_fallback(duration=60)
330
  def my_gpu_func(data):
@@ -341,10 +186,7 @@ def gpu_with_fallback(duration=60):
341
  with model_device_lock:
342
  try:
343
  _drain_stale_models()
344
- result = func(*args, **kwargs)
345
- _mark_cuda_success()
346
- _reset_sdk_worker_failures()
347
- return result
348
  finally:
349
  try:
350
  _cleanup_after_gpu()
@@ -373,105 +215,34 @@ def gpu_with_fallback(duration=60):
373
  print("[GPU] Quota exhausted, using CPU fallback")
374
  return func(*args, **kwargs)
375
 
376
- # If CUDA is unhealthy process-wide, skip GPU to prevent
377
- # further poisoning of torch's internal CUDA state
378
- if not _is_cuda_healthy():
379
- from config import CUDA_COOLDOWN_SECONDS
380
- remaining = CUDA_COOLDOWN_SECONDS - (_time.time() - _cuda_unhealthy_since)
381
- print(f"[CUDA HEALTH] CUDA unhealthy, skipping GPU (retry in {remaining:.0f}s)")
382
- _request_state.gpu_quota_exhausted = True
383
- try:
384
- import gradio as gr
385
- gr.Warning(f"GPU temporarily unavailable — using CPU. Retry in {max(1, int(remaining / 60))}m.")
386
- except Exception:
387
- pass
388
- return func(*args, **kwargs)
389
-
390
- # If SDK worker scheduling is in cooldown, skip GPU
391
- if not _is_sdk_worker_healthy():
392
- print("[GPU] SDK worker in cooldown, skipping GPU")
393
- _request_state.gpu_quota_exhausted = True
394
- try:
395
- import gradio as gr
396
- gr.Warning("GPU temporarily unavailable — using CPU (slower).")
397
- except Exception:
398
- pass
399
- return func(*args, **kwargs)
400
-
401
- # Try GPU first
402
  try:
403
  return gpu_func(*args, **kwargs)
404
  except Exception as e:
405
- print(f"[GPU] gpu_func error: {type(e).__name__}: {e}")
406
- # ZeroGPU raises gradio.Error with title="ZeroGPU quota exceeded"
 
 
407
  is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
408
  if not is_quota_error:
409
- err_str = str(e).lower()
410
- is_quota_error = 'quota' in err_str and ('exceeded' in err_str or 'exhausted' in err_str)
411
 
412
  if is_quota_error:
413
  print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
414
  _request_state.gpu_quota_exhausted = True
415
- # Parse reset time from message like "Try again in 13:53:59"
416
- match = re.search(r'Try again in (\d+:\d{2}:\d{2})', str(e))
417
  if match:
418
  _request_state.quota_reset_time = match.group(1)
419
- # NOT setting process-global flag: quota is per-user,
420
- # other users may still have quota available.
421
- # Show immediate toast notification
422
  try:
423
  import gradio as gr
424
  reset_time = get_quota_reset_time()
425
  reset_msg = f" Resets in {reset_time}." if reset_time else ""
426
  gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
427
- except Exception:
428
- pass # Not in a Gradio context (e.g., CLI usage)
429
- return func(*args, **kwargs)
430
-
431
- # Check for CUDA hardware/driver errors (e.g. worker_init failure)
432
- err_lower = str(e).lower()
433
- is_cuda_error = any(p in err_lower for p in _CUDA_ERROR_PATTERNS)
434
-
435
- # SDK wraps worker_init failures as gradio.Error(title="ZeroGPU worker error")
436
- # with message = just the exception class name. Original CUDA message is lost.
437
- is_sdk_worker_error = False
438
- if not is_cuda_error:
439
- err_title = getattr(e, 'title', '') or ''
440
- is_sdk_worker_error = 'worker' in err_title.lower() and 'error' in err_title.lower()
441
-
442
- if is_cuda_error:
443
- print(f"[GPU] CUDA error, falling back to CPU: {e}")
444
- _mark_cuda_unhealthy()
445
- _request_state.gpu_quota_exhausted = True
446
- try:
447
- import gradio as gr
448
- gr.Warning("GPU hardware error — switching to CPU (slower).")
449
- except Exception:
450
- pass
451
- return func(*args, **kwargs)
452
-
453
- if is_sdk_worker_error:
454
- # worker_init failed — the assigned GPU UUID wasn't available.
455
- # This is transient: retrying calls client.schedule() which
456
- # assigns a different GPU. No sleep needed.
457
- print(f"[GPU] SDK worker error, retrying GPU (new schedule)...")
458
- try:
459
- result = gpu_func(*args, **kwargs)
460
- _mark_cuda_success()
461
- _reset_sdk_worker_failures()
462
- return result
463
- except Exception as retry_e:
464
- print(f"[GPU] GPU retry also failed: {retry_e}")
465
- _record_sdk_worker_failure()
466
- # Both attempts failed — fall through to CPU
467
- _request_state.gpu_quota_exhausted = True
468
- try:
469
- import gradio as gr
470
- gr.Warning("GPU temporarily unavailable — using CPU (slower).")
471
  except Exception:
472
  pass
473
  return func(*args, **kwargs)
474
 
 
475
  is_timeout = (
476
  'timeout' in err_lower
477
  or 'duration' in err_lower
@@ -481,19 +252,11 @@ def gpu_with_fallback(duration=60):
481
  print(f"[GPU] Timeout error in {func.__name__}: {e}")
482
  raise
483
 
484
- # Unrecognized GPU/SDK error fall back to CPU for THIS REQUEST only.
485
- # Do NOT mark CUDA unhealthy: unknown errors are often transient SDK
486
- # issues (worker scheduling, network, etc.) that don't indicate CUDA
487
- # poisoning. Only genuine CUDA errors (matched above) should trigger
488
- # process-wide health flags that block other users.
489
- print(f"[GPU] Unrecognized GPU error, falling back to CPU: {type(e).__name__}: {e}")
490
- _request_state.gpu_quota_exhausted = True
491
- try:
492
- import gradio as gr
493
- gr.Warning("GPU error — using CPU (slower).")
494
- except Exception:
495
- pass
496
- return func(*args, **kwargs)
497
 
498
  return wrapper
499
  return decorator
 
5
 
6
  import os
7
  import re
 
8
  import threading
9
  from typing import Callable, TypeVar
10
  from functools import wraps
 
17
  # Per-thread (per-request) GPU state so concurrent requests don't interfere
18
  _request_state = threading.local()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # ---------------------------------------------------------------------------
21
  # Shared RLock for model device transitions AND inference.
22
  # RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
 
34
  _models_stale = False # Set True at lease end; drained at next lease start
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
  import spaces # type: ignore
39
 
 
73
  _active_gpu_leases = max(0, _active_gpu_leases - 1)
74
 
75
 
 
 
 
 
 
76
  # =========================================================================
77
  # Per-thread state helpers
78
  # =========================================================================
 
109
  _request_state.user_forced_cpu = True
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # =========================================================================
113
  # Model cleanup helpers
114
  # =========================================================================
 
165
  The model_device_lock is held for the ENTIRE GPU lease (inference +
166
  cleanup) to prevent concurrent threads from moving models mid-inference.
167
 
168
+ Error handling strategy:
169
+ - Quota exhaustion → CPU fallback (per-user, not process issue)
170
+ - Timeout → propagate to caller
171
+ - Any other GPU error → os._exit(1) for clean process restart
172
+
173
  Usage:
174
  @gpu_with_fallback(duration=60)
175
  def my_gpu_func(data):
 
186
  with model_device_lock:
187
  try:
188
  _drain_stale_models()
189
+ return func(*args, **kwargs)
 
 
 
190
  finally:
191
  try:
192
  _cleanup_after_gpu()
 
215
  print("[GPU] Quota exhausted, using CPU fallback")
216
  return func(*args, **kwargs)
217
 
218
+ # Try GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  try:
220
  return gpu_func(*args, **kwargs)
221
  except Exception as e:
222
+ err_str = str(e)
223
+ err_lower = err_str.lower()
224
+
225
+ # Quota exhaustion → CPU fallback (per-user, not process issue)
226
  is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
227
  if not is_quota_error:
228
+ is_quota_error = 'quota' in err_lower and ('exceeded' in err_lower or 'exhausted' in err_lower)
 
229
 
230
  if is_quota_error:
231
  print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
232
  _request_state.gpu_quota_exhausted = True
233
+ match = re.search(r'Try again in (\d+:\d{2}:\d{2})', err_str)
 
234
  if match:
235
  _request_state.quota_reset_time = match.group(1)
 
 
 
236
  try:
237
  import gradio as gr
238
  reset_time = get_quota_reset_time()
239
  reset_msg = f" Resets in {reset_time}." if reset_time else ""
240
  gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  except Exception:
242
  pass
243
  return func(*args, **kwargs)
244
 
245
+ # Timeout → propagate to caller
246
  is_timeout = (
247
  'timeout' in err_lower
248
  or 'duration' in err_lower
 
252
  print(f"[GPU] Timeout error in {func.__name__}: {e}")
253
  raise
254
 
255
+ # ANY other GPU error process is poisoned, kill immediately.
256
+ # HF Spaces auto-restarts the container with a fresh process.
257
+ print(f"[GPU] Fatal GPU error: {type(e).__name__}: {e}")
258
+ print("[GPU] Restarting process to recover clean GPU state...")
259
+ os._exit(1)
 
 
 
 
 
 
 
 
260
 
261
  return wrapper
262
  return decorator
src/pipeline.py CHANGED
@@ -174,8 +174,12 @@ def _run_post_vad_pipeline(
174
 
175
  # If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
176
  # with empty placeholders so indices stay aligned.
 
 
 
177
  if len(vad_segments) != len(phoneme_texts):
178
- phoneme_texts = [[], []] + phoneme_texts[1:]
 
179
 
180
  # Anchor detection via phoneme n-gram voting
181
  progress(*progress_steps["anchor"])
@@ -206,7 +210,7 @@ def _run_post_vad_pipeline(
206
 
207
  # Phoneme-based DP alignment
208
  match_start = time.time()
209
- match_results, match_profiling, gap_segments = run_phoneme_matching(
210
  phoneme_texts,
211
  surah,
212
  first_quran_idx,
@@ -237,6 +241,7 @@ def _run_post_vad_pipeline(
237
  profiling.tier2_segments = match_profiling.get("tier2_segments", [])
238
  profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
239
  profiling.special_merges = match_profiling.get("special_merges", 0)
 
240
  profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
241
  profiling.segments_passed = match_profiling.get("segments_passed", 0)
242
 
@@ -268,9 +273,19 @@ def _run_post_vad_pipeline(
268
  _underseg_by_words: list[int] = []
269
  _underseg_by_ayah: list[int] = []
270
 
 
 
 
 
 
 
271
  for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
272
  zip(vad_segments, match_results)
273
  ):
 
 
 
 
274
  if idx == last_display_idx and matched_ref:
275
  if not is_end_of_verse(matched_ref):
276
  score = max(0.0, score - 0.25)
@@ -283,13 +298,15 @@ def _run_post_vad_pipeline(
283
  matched_ref = ""
284
  error = f"Low confidence ({score:.0%})"
285
 
286
- duration = seg.end_time - seg.start_time
 
 
287
  word_count, ayah_span = get_segment_word_stats(matched_ref)
288
  underseg = check_undersegmented(matched_ref, duration)
289
 
290
  segments.append(SegmentInfo(
291
  start_time=seg.start_time,
292
- end_time=seg.end_time,
293
  transcribed_text=phoneme_text,
294
  matched_text=matched_text,
295
  matched_ref=matched_ref,
 
174
 
175
  # If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
176
  # with empty placeholders so indices stay aligned.
177
+ # The split replaces one segment with two, so vad_segments is 1 longer.
178
+ # Insert an empty placeholder at the split position (= first_quran_idx - 2
179
+ # is where the combined segment was, but simpler: find the gap).
180
  if len(vad_segments) != len(phoneme_texts):
181
+ split_idx = first_quran_idx - 2 # Combined was split into 2 entries starting here
182
+ phoneme_texts = phoneme_texts[:split_idx] + [[], []] + phoneme_texts[split_idx + 1:]
183
 
184
  # Anchor detection via phoneme n-gram voting
185
  progress(*progress_steps["anchor"])
 
210
 
211
  # Phoneme-based DP alignment
212
  match_start = time.time()
213
+ match_results, match_profiling, gap_segments, merged_into = run_phoneme_matching(
214
  phoneme_texts,
215
  surah,
216
  first_quran_idx,
 
241
  profiling.tier2_segments = match_profiling.get("tier2_segments", [])
242
  profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
243
  profiling.special_merges = match_profiling.get("special_merges", 0)
244
+ profiling.transition_skips = match_profiling.get("transition_skips", 0)
245
  profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
246
  profiling.segments_passed = match_profiling.get("segments_passed", 0)
247
 
 
273
  _underseg_by_words: list[int] = []
274
  _underseg_by_ayah: list[int] = []
275
 
276
+ # Pre-compute merged end times: extend target segment's end_time
277
+ _merged_end_times = {} # {target_idx: extended_end_time}
278
+ for consumed_idx, target_idx in merged_into.items():
279
+ if consumed_idx < len(vad_segments):
280
+ _merged_end_times[target_idx] = vad_segments[consumed_idx].end_time
281
+
282
  for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
283
  zip(vad_segments, match_results)
284
  ):
285
+ # Skip segments consumed by Tahmeed merge
286
+ if idx in merged_into:
287
+ continue
288
+
289
  if idx == last_display_idx and matched_ref:
290
  if not is_end_of_verse(matched_ref):
291
  score = max(0.0, score - 0.25)
 
298
  matched_ref = ""
299
  error = f"Low confidence ({score:.0%})"
300
 
301
+ # Extend end_time if this segment absorbed a merged segment
302
+ seg_end_time = _merged_end_times.get(idx, seg.end_time)
303
+ duration = seg_end_time - seg.start_time
304
  word_count, ayah_span = get_segment_word_stats(matched_ref)
305
  underseg = check_undersegmented(matched_ref, duration)
306
 
307
  segments.append(SegmentInfo(
308
  start_time=seg.start_time,
309
+ end_time=seg_end_time,
310
  transcribed_text=phoneme_text,
311
  matched_text=matched_text,
312
  matched_ref=matched_ref,
src/ui/segments.py CHANGED
@@ -16,6 +16,7 @@ from config import (
16
  SURAH_INFO_PATH,
17
  )
18
  from src.core.segment_types import SegmentInfo
 
19
 
20
 
21
  def format_timestamp(seconds: float) -> str:
@@ -237,11 +238,14 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
237
  render_key: Unique key to prevent browser caching between renders
238
  segment_dir: Directory to write segment WAV files into
239
  """
 
240
  confidence_class = get_confidence_class(seg.match_score)
241
  confidence_badge_class = confidence_class # preserve original for badge color
242
- if seg.has_missing_words:
 
 
243
  confidence_class = "segment-low"
244
- if seg.potentially_undersegmented and confidence_class != "segment-low":
245
  confidence_class = "segment-underseg"
246
 
247
  timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
@@ -330,7 +334,12 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
330
  else:
331
  text_html = ""
332
 
333
- confidence_badge = "" if seg.has_missing_words else f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
 
 
 
 
 
334
 
335
  # Build inline header: Segment N | ref | duration | time range
336
  header_parts = [f"Segment {idx + 1}"]
@@ -389,9 +398,11 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
389
  wf.writeframes(audio_int16.tobytes())
390
  full_audio_url = f"/gradio_api/file={full_path}"
391
 
392
- # Categorize segments by confidence level (1-indexed for display)
393
- med_segments = [i + 1 for i, s in enumerate(segments) if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH]
394
- low_segments = [i + 1 for i, s in enumerate(segments) if s.match_score < CONFIDENCE_MED]
 
 
395
 
396
  # Build header with confidence summary
397
  header_parts = []
 
16
  SURAH_INFO_PATH,
17
  )
18
  from src.core.segment_types import SegmentInfo
19
+ from src.alignment.special_segments import ALL_SPECIAL_REFS
20
 
21
 
22
  def format_timestamp(seconds: float) -> str:
 
238
  render_key: Unique key to prevent browser caching between renders
239
  segment_dir: Directory to write segment WAV files into
240
  """
241
+ is_special = seg.matched_ref in ALL_SPECIAL_REFS
242
  confidence_class = get_confidence_class(seg.match_score)
243
  confidence_badge_class = confidence_class # preserve original for badge color
244
+ if is_special:
245
+ confidence_class = "segment-special"
246
+ elif seg.has_missing_words:
247
  confidence_class = "segment-low"
248
+ elif seg.potentially_undersegmented and confidence_class != "segment-low":
249
  confidence_class = "segment-underseg"
250
 
251
  timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
 
334
  else:
335
  text_html = ""
336
 
337
+ if is_special:
338
+ confidence_badge = f'<div class="segment-badge segment-special-badge">{seg.matched_ref}</div>'
339
+ elif seg.has_missing_words:
340
+ confidence_badge = ""
341
+ else:
342
+ confidence_badge = f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
343
 
344
  # Build inline header: Segment N | ref | duration | time range
345
  header_parts = [f"Segment {idx + 1}"]
 
398
  wf.writeframes(audio_int16.tobytes())
399
  full_audio_url = f"/gradio_api/file={full_path}"
400
 
401
+ # Categorize segments by confidence level (1-indexed for display), excluding specials
402
+ med_segments = [i + 1 for i, s in enumerate(segments)
403
+ if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH and s.matched_ref not in ALL_SPECIAL_REFS]
404
+ low_segments = [i + 1 for i, s in enumerate(segments)
405
+ if s.match_score < CONFIDENCE_MED and s.matched_ref not in ALL_SPECIAL_REFS]
406
 
407
  # Build header with confidence summary
408
  header_parts = []
src/ui/styles.py CHANGED
@@ -400,6 +400,8 @@ def build_css() -> str:
400
  .segment-low-badge {{ background: #dc3545; }}
401
  .segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
402
  .segment-underseg-badge {{ background: #ff8c00; }}
 
 
403
 
404
  /* Review summary text colors */
405
  .segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
@@ -421,11 +423,13 @@ def build_css() -> str:
421
  .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
422
  .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
423
  .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
 
424
  }}
425
  /* Also support Gradio's dark class */
426
  .dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
427
  .dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
428
  .dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
429
  .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
 
430
 
431
  """
 
400
  .segment-low-badge {{ background: #dc3545; }}
401
  .segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
402
  .segment-underseg-badge {{ background: #ff8c00; }}
403
+ .segment-special {{ background: #e8eaf6; border-color: #5c6bc0; border-style: dashed; }}
404
+ .segment-special-badge {{ background: #5c6bc0; }}
405
 
406
  /* Review summary text colors */
407
  .segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
 
423
  .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
424
  .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
425
  .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
426
+ .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
427
  }}
428
  /* Also support Gradio's dark class */
429
  .dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
430
  .dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
431
  .dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
432
  .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
433
+ .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
434
 
435
  """