Spaces:
Running on Zero
Running on Zero
Add transition segment detection for non-Quranic phrases (Amin, Takbir, Tahmeed)
Browse filesDetect and render Amin (after Al-Fatiha), Takbir (Allahu Akbar), and
Tahmeed (Sami'a Allahu liman hamidah) as special segment cards with
distinct styling. Includes Takbir-at-start detection before Isti'adha/
Basmala, transition mode state machine for consecutive non-Quranic
segments, Tahmeed peek-ahead merge for split sami'a+rabbana segments,
and unified special segment rendering with name badges.
Also simplify GPU error handling: replace per-error-type fallback with
immediate process restart for non-quota GPU errors.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- config.py +1 -11
- src/alignment/alignment_pipeline.py +119 -7
- src/alignment/special_segments.py +170 -33
- src/core/segment_types.py +2 -0
- src/core/zero_gpu.py +19 -256
- src/pipeline.py +21 -4
- src/ui/segments.py +17 -6
- src/ui/styles.py +4 -0
config.py
CHANGED
|
@@ -133,6 +133,7 @@ LOOKBACK_WORDS = 30 # Window words to look back from pointer for
|
|
| 133 |
LOOKAHEAD_WORDS = 10 # Window words to look ahead after expected end position
|
| 134 |
MAX_EDIT_DISTANCE = 0.25 # Max normalized edit distance for valid ayah match
|
| 135 |
MAX_SPECIAL_EDIT_DISTANCE = 0.35 # Max normalized edit distance for Basmala/Isti'adha detection
|
|
|
|
| 136 |
START_PRIOR_WEIGHT = 0.005 # Penalty per word away from expected position
|
| 137 |
|
| 138 |
# Failed Segments
|
|
@@ -234,17 +235,6 @@ PROGRESS_RETRANSCRIBE = {
|
|
| 234 |
|
| 235 |
MFA_PROGRESS_SEGMENT_RATE = 0.05 # seconds per segment for progress bar animation
|
| 236 |
|
| 237 |
-
# =============================================================================
|
| 238 |
-
# CUDA health monitoring (ZeroGPU poisoning protection)
|
| 239 |
-
# =============================================================================
|
| 240 |
-
|
| 241 |
-
CUDA_COOLDOWN_SECONDS = 30 # Seconds before retrying GPU after CUDA error
|
| 242 |
-
MAX_CUDA_FAILURES = 5 # Consecutive CUDA failures before process restart
|
| 243 |
-
|
| 244 |
-
# SDK worker scheduling errors (transient — different from CUDA hardware errors)
|
| 245 |
-
SDK_WORKER_COOLDOWN_SECONDS = 15 # Very short — scheduler issues resolve fast
|
| 246 |
-
SDK_WORKER_FAILURE_THRESHOLD = 2 # After 2 consecutive failed retries → cooldown
|
| 247 |
-
|
| 248 |
# =============================================================================
|
| 249 |
# UI settings
|
| 250 |
# =============================================================================
|
|
|
|
| 133 |
LOOKAHEAD_WORDS = 10 # Window words to look ahead after expected end position
|
| 134 |
MAX_EDIT_DISTANCE = 0.25 # Max normalized edit distance for valid ayah match
|
| 135 |
MAX_SPECIAL_EDIT_DISTANCE = 0.35 # Max normalized edit distance for Basmala/Isti'adha detection
|
| 136 |
+
MAX_TRANSITION_EDIT_DISTANCE = 0.35 # Max normalized edit distance for transition segments (Amin/Takbir/Tahmeed)
|
| 137 |
START_PRIOR_WEIGHT = 0.005 # Penalty per word away from expected position
|
| 138 |
|
| 139 |
# Failed Segments
|
|
|
|
| 235 |
|
| 236 |
MFA_PROGRESS_SEGMENT_RATE = 0.05 # seconds per segment for progress bar animation
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
# =============================================================================
|
| 239 |
# UI settings
|
| 240 |
# =============================================================================
|
src/alignment/alignment_pipeline.py
CHANGED
|
@@ -18,7 +18,7 @@ def run_phoneme_matching(
|
|
| 18 |
first_quran_idx: int = 0,
|
| 19 |
special_results: List[tuple] = None,
|
| 20 |
start_pointer: int = 0,
|
| 21 |
-
) -> Tuple[List[tuple], dict, set]:
|
| 22 |
"""
|
| 23 |
Phoneme-based segment matching using substring DP.
|
| 24 |
|
|
@@ -30,8 +30,9 @@ def run_phoneme_matching(
|
|
| 30 |
start_pointer: Initial word pointer from anchor voting
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
-
(results, profiling_dict, gap_segments)
|
| 34 |
results: List[(matched_text, score, matched_ref), ...]
|
|
|
|
| 35 |
"""
|
| 36 |
from .phoneme_matcher import align_segment, get_matched_text
|
| 37 |
from .phoneme_matcher_cache import get_chapter_reference
|
|
@@ -62,7 +63,10 @@ def run_phoneme_matching(
|
|
| 62 |
result_build_total = 0.0
|
| 63 |
|
| 64 |
# Track whether the next segment might have Basmala fused with verse content
|
| 65 |
-
from .special_segments import
|
|
|
|
|
|
|
|
|
|
| 66 |
basmala_already_detected = any(
|
| 67 |
r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
|
| 68 |
)
|
|
@@ -70,6 +74,12 @@ def run_phoneme_matching(
|
|
| 70 |
|
| 71 |
special_merges = 0
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# Gap tracking (initialized here so inline chapter-transition checks can add entries)
|
| 74 |
gap_segments = set()
|
| 75 |
transition_expected_pointer = -1 # -1 = no pending check
|
|
@@ -111,9 +121,60 @@ def run_phoneme_matching(
|
|
| 111 |
skip_count -= 1
|
| 112 |
continue
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
segment_idx = first_quran_idx + i + 1 # 1-indexed for display
|
| 115 |
segments_attempted += 1
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
|
| 118 |
num_segments += 1
|
| 119 |
|
|
@@ -125,8 +186,22 @@ def run_phoneme_matching(
|
|
| 125 |
|
| 126 |
# Chapter transition: pointer past end of chapter
|
| 127 |
if alignment is None and pointer >= chapter_ref.num_words:
|
| 128 |
-
from .special_segments import detect_inter_chapter_specials
|
| 129 |
remaining_phonemes = phoneme_texts[first_quran_idx + i:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
|
| 131 |
|
| 132 |
if chapter_ref.surah == 1:
|
|
@@ -134,8 +209,8 @@ def run_phoneme_matching(
|
|
| 134 |
print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
|
| 135 |
f"running global reanchor...")
|
| 136 |
|
| 137 |
-
# Use segments after specials for anchor voting
|
| 138 |
-
anchor_offset = first_quran_idx + i + num_consumed
|
| 139 |
anchor_remaining = phoneme_texts[anchor_offset:]
|
| 140 |
|
| 141 |
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
|
|
@@ -171,6 +246,18 @@ def run_phoneme_matching(
|
|
| 171 |
detected_surah = next_surah
|
| 172 |
consecutive_failures = 0
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if num_consumed > 0:
|
| 175 |
has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
|
| 176 |
is_first_after_transition = not has_basmala
|
|
@@ -237,6 +324,29 @@ def run_phoneme_matching(
|
|
| 237 |
_check_transition_gap(alignment.start_word_idx)
|
| 238 |
segments_passed += 1
|
| 239 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
# === Graduated retry ===
|
| 241 |
# Tier 1: expanded window, same threshold
|
| 242 |
tier1_attempts += 1
|
|
@@ -381,6 +491,7 @@ def run_phoneme_matching(
|
|
| 381 |
"segments_attempted": segments_attempted,
|
| 382 |
"segments_passed": segments_passed,
|
| 383 |
"special_merges": special_merges,
|
|
|
|
| 384 |
}
|
| 385 |
else:
|
| 386 |
profiling = {
|
|
@@ -395,6 +506,7 @@ def run_phoneme_matching(
|
|
| 395 |
"segments_attempted": segments_attempted,
|
| 396 |
"segments_passed": segments_passed,
|
| 397 |
"special_merges": special_merges,
|
|
|
|
| 398 |
}
|
| 399 |
|
| 400 |
-
return results, profiling, gap_segments
|
|
|
|
| 18 |
first_quran_idx: int = 0,
|
| 19 |
special_results: List[tuple] = None,
|
| 20 |
start_pointer: int = 0,
|
| 21 |
+
) -> Tuple[List[tuple], dict, set, dict]:
|
| 22 |
"""
|
| 23 |
Phoneme-based segment matching using substring DP.
|
| 24 |
|
|
|
|
| 30 |
start_pointer: Initial word pointer from anchor voting
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
+
(results, profiling_dict, gap_segments, merged_into)
|
| 34 |
results: List[(matched_text, score, matched_ref), ...]
|
| 35 |
+
merged_into: dict mapping consumed segment indices to their target segment index
|
| 36 |
"""
|
| 37 |
from .phoneme_matcher import align_segment, get_matched_text
|
| 38 |
from .phoneme_matcher_cache import get_chapter_reference
|
|
|
|
| 63 |
result_build_total = 0.0
|
| 64 |
|
| 65 |
# Track whether the next segment might have Basmala fused with verse content
|
| 66 |
+
from .special_segments import (
|
| 67 |
+
SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT,
|
| 68 |
+
detect_transition_segment, detect_inter_chapter_specials,
|
| 69 |
+
)
|
| 70 |
basmala_already_detected = any(
|
| 71 |
r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
|
| 72 |
)
|
|
|
|
| 74 |
|
| 75 |
special_merges = 0
|
| 76 |
|
| 77 |
+
# Transition segment state
|
| 78 |
+
transition_mode = False
|
| 79 |
+
transition_skips = 0
|
| 80 |
+
tahmeed_merge_skip = 0
|
| 81 |
+
merged_into = {} # {consumed_idx: target_idx}
|
| 82 |
+
|
| 83 |
# Gap tracking (initialized here so inline chapter-transition checks can add entries)
|
| 84 |
gap_segments = set()
|
| 85 |
transition_expected_pointer = -1 # -1 = no pending check
|
|
|
|
| 121 |
skip_count -= 1
|
| 122 |
continue
|
| 123 |
|
| 124 |
+
# Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments)
|
| 125 |
+
if tahmeed_merge_skip > 0:
|
| 126 |
+
# This segment's audio was merged into the previous Tahmeed segment
|
| 127 |
+
results.append(("", 0.0, ""))
|
| 128 |
+
word_indices.append(None)
|
| 129 |
+
tahmeed_merge_skip -= 1
|
| 130 |
+
transition_skips += 1
|
| 131 |
+
continue
|
| 132 |
+
|
| 133 |
segment_idx = first_quran_idx + i + 1 # 1-indexed for display
|
| 134 |
segments_attempted += 1
|
| 135 |
|
| 136 |
+
# Transition mode: keep checking for transitions before trying alignment
|
| 137 |
+
if transition_mode:
|
| 138 |
+
trans_name, trans_conf = detect_transition_segment(asr_phonemes)
|
| 139 |
+
if trans_name:
|
| 140 |
+
print(f" [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
|
| 141 |
+
results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name))
|
| 142 |
+
word_indices.append(None)
|
| 143 |
+
transition_skips += 1
|
| 144 |
+
|
| 145 |
+
# Tahmeed peek-ahead for merge
|
| 146 |
+
if trans_name == "Tahmeed":
|
| 147 |
+
next_abs = first_quran_idx + i + 1
|
| 148 |
+
if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
|
| 149 |
+
resp_name, resp_conf = detect_transition_segment(
|
| 150 |
+
phoneme_texts[next_abs], allowed={"Tahmeed"})
|
| 151 |
+
if resp_name:
|
| 152 |
+
merged_into[next_abs] = first_quran_idx + i
|
| 153 |
+
tahmeed_merge_skip = 1
|
| 154 |
+
print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
|
| 155 |
+
|
| 156 |
+
continue
|
| 157 |
+
else:
|
| 158 |
+
# Exit transition mode, global reanchor
|
| 159 |
+
transition_mode = False
|
| 160 |
+
print(f" [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...")
|
| 161 |
+
remaining_idx = first_quran_idx + i
|
| 162 |
+
remaining_texts = phoneme_texts[remaining_idx:]
|
| 163 |
+
if remaining_texts:
|
| 164 |
+
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
|
| 165 |
+
remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS,
|
| 166 |
+
)
|
| 167 |
+
if reanchor_surah > 0:
|
| 168 |
+
if reanchor_surah != detected_surah:
|
| 169 |
+
detected_surah = reanchor_surah
|
| 170 |
+
chapter_ref = get_chapter_reference(detected_surah)
|
| 171 |
+
pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
|
| 172 |
+
transition_expected_pointer = pointer
|
| 173 |
+
print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, "
|
| 174 |
+
f"Ayah {reanchor_ayah}, word {pointer}")
|
| 175 |
+
consecutive_failures = 0
|
| 176 |
+
# Fall through to normal alignment below
|
| 177 |
+
|
| 178 |
alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
|
| 179 |
num_segments += 1
|
| 180 |
|
|
|
|
| 186 |
|
| 187 |
# Chapter transition: pointer past end of chapter
|
| 188 |
if alignment is None and pointer >= chapter_ref.num_words:
|
|
|
|
| 189 |
remaining_phonemes = phoneme_texts[first_quran_idx + i:]
|
| 190 |
+
amin_consumed = 0
|
| 191 |
+
|
| 192 |
+
if chapter_ref.surah == 1:
|
| 193 |
+
# Check for Amin after Al-Fatiha before inter-chapter specials
|
| 194 |
+
amin_name, amin_conf = detect_transition_segment(
|
| 195 |
+
asr_phonemes, allowed={"Amin"})
|
| 196 |
+
if amin_name:
|
| 197 |
+
print(f" [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})")
|
| 198 |
+
results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin"))
|
| 199 |
+
word_indices.append(None)
|
| 200 |
+
transition_skips += 1
|
| 201 |
+
amin_consumed = 1
|
| 202 |
+
# Re-slice remaining phonemes to start after Amin
|
| 203 |
+
remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:]
|
| 204 |
+
|
| 205 |
inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
|
| 206 |
|
| 207 |
if chapter_ref.surah == 1:
|
|
|
|
| 209 |
print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
|
| 210 |
f"running global reanchor...")
|
| 211 |
|
| 212 |
+
# Use segments after Amin + specials for anchor voting
|
| 213 |
+
anchor_offset = first_quran_idx + i + amin_consumed + num_consumed
|
| 214 |
anchor_remaining = phoneme_texts[anchor_offset:]
|
| 215 |
|
| 216 |
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
|
|
|
|
| 246 |
detected_surah = next_surah
|
| 247 |
consecutive_failures = 0
|
| 248 |
|
| 249 |
+
if amin_consumed > 0:
|
| 250 |
+
# Current segment was Amin (already appended above).
|
| 251 |
+
# Queue inter-chapter specials for subsequent segments.
|
| 252 |
+
has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
|
| 253 |
+
is_first_after_transition = not has_basmala
|
| 254 |
+
if num_consumed > 0:
|
| 255 |
+
pending_specials = list(inter_specials)
|
| 256 |
+
skip_count = num_consumed
|
| 257 |
+
else:
|
| 258 |
+
is_first_after_transition = True
|
| 259 |
+
continue
|
| 260 |
+
|
| 261 |
if num_consumed > 0:
|
| 262 |
has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
|
| 263 |
is_first_after_transition = not has_basmala
|
|
|
|
| 324 |
_check_transition_gap(alignment.start_word_idx)
|
| 325 |
segments_passed += 1
|
| 326 |
else:
|
| 327 |
+
# === Check for transition segment before retry tiers ===
|
| 328 |
+
trans_name, trans_conf = detect_transition_segment(asr_phonemes)
|
| 329 |
+
if trans_name:
|
| 330 |
+
print(f" [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
|
| 331 |
+
result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name)
|
| 332 |
+
word_indices.append(None)
|
| 333 |
+
transition_skips += 1
|
| 334 |
+
transition_mode = True
|
| 335 |
+
|
| 336 |
+
# Tahmeed peek-ahead for merge
|
| 337 |
+
if trans_name == "Tahmeed":
|
| 338 |
+
next_abs = first_quran_idx + i + 1
|
| 339 |
+
if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
|
| 340 |
+
resp_name, resp_conf = detect_transition_segment(
|
| 341 |
+
phoneme_texts[next_abs], allowed={"Tahmeed"})
|
| 342 |
+
if resp_name:
|
| 343 |
+
merged_into[next_abs] = first_quran_idx + i
|
| 344 |
+
tahmeed_merge_skip = 1
|
| 345 |
+
print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
|
| 346 |
+
|
| 347 |
+
results.append(result)
|
| 348 |
+
continue
|
| 349 |
+
|
| 350 |
# === Graduated retry ===
|
| 351 |
# Tier 1: expanded window, same threshold
|
| 352 |
tier1_attempts += 1
|
|
|
|
| 491 |
"segments_attempted": segments_attempted,
|
| 492 |
"segments_passed": segments_passed,
|
| 493 |
"special_merges": special_merges,
|
| 494 |
+
"transition_skips": transition_skips,
|
| 495 |
}
|
| 496 |
else:
|
| 497 |
profiling = {
|
|
|
|
| 506 |
"segments_attempted": segments_attempted,
|
| 507 |
"segments_passed": segments_passed,
|
| 508 |
"special_merges": special_merges,
|
| 509 |
+
"transition_skips": transition_skips,
|
| 510 |
}
|
| 511 |
|
| 512 |
+
return results, profiling, gap_segments, merged_into
|
src/alignment/special_segments.py
CHANGED
|
@@ -16,7 +16,7 @@ from typing import List, Tuple, Optional
|
|
| 16 |
# Constants
|
| 17 |
# =============================================================================
|
| 18 |
|
| 19 |
-
from config import MAX_SPECIAL_EDIT_DISTANCE
|
| 20 |
|
| 21 |
# Special phoneme sequences
|
| 22 |
SPECIAL_PHONEMES = {
|
|
@@ -40,6 +40,66 @@ SPECIAL_TEXT = {
|
|
| 40 |
"Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
|
| 41 |
}
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# =============================================================================
|
| 45 |
# Levenshtein Distance
|
|
@@ -141,8 +201,25 @@ def detect_special_segments(
|
|
| 141 |
|
| 142 |
special_results: List[Tuple[str, float, str]] = []
|
| 143 |
|
| 144 |
-
#
|
|
|
|
|
|
|
|
|
|
| 145 |
seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
# ==========================================================================
|
| 148 |
# 1. Try COMBINED (Isti'adha + Basmala in one segment)
|
|
@@ -152,82 +229,88 @@ def detect_special_segments(
|
|
| 152 |
if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 153 |
print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
|
| 154 |
|
| 155 |
-
# Split
|
| 156 |
-
seg = vad_segments[
|
| 157 |
-
audio = segment_audios[
|
| 158 |
mid_time = (seg.start_time + seg.end_time) / 2.0
|
| 159 |
mid_sample = max(1, len(audio) // 2)
|
| 160 |
|
| 161 |
-
#
|
| 162 |
-
new_vads = [
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
]
|
| 170 |
|
| 171 |
# Add remaining segments with reindexed segment_idx
|
| 172 |
-
for
|
| 173 |
new_vads.append(VadSegment(
|
| 174 |
start_time=vs.start_time,
|
| 175 |
end_time=vs.end_time,
|
| 176 |
-
segment_idx=
|
| 177 |
))
|
| 178 |
-
new_audios.extend(segment_audios[1:])
|
| 179 |
|
| 180 |
# Special results for both (confidence = 1 - distance)
|
| 181 |
confidence = 1.0 - combined_dist
|
| 182 |
-
special_results
|
| 183 |
(SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
|
| 184 |
(SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
|
| 185 |
-
]
|
| 186 |
|
| 187 |
-
return new_vads, new_audios, special_results, 2
|
| 188 |
|
| 189 |
# ==========================================================================
|
| 190 |
-
# 2. Try Isti'adha on
|
| 191 |
# ==========================================================================
|
| 192 |
istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
|
| 193 |
|
| 194 |
if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 195 |
-
print(f"[SPECIAL] Isti'adha detected on segment
|
| 196 |
special_results.append(
|
| 197 |
(SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
|
| 198 |
)
|
| 199 |
|
| 200 |
-
# Try Basmala on
|
| 201 |
-
|
| 202 |
-
|
|
|
|
| 203 |
basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
|
| 204 |
|
| 205 |
if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 206 |
-
print(f"[SPECIAL] Basmala detected on segment
|
| 207 |
special_results.append(
|
| 208 |
(SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
|
| 209 |
)
|
| 210 |
-
return vad_segments, segment_audios, special_results, 2
|
| 211 |
else:
|
| 212 |
-
print(f"[SPECIAL] No Basmala on segment
|
| 213 |
|
| 214 |
-
return vad_segments, segment_audios, special_results, 1
|
| 215 |
|
| 216 |
# ==========================================================================
|
| 217 |
-
# 3. Try Basmala on
|
| 218 |
# ==========================================================================
|
| 219 |
basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
|
| 220 |
|
| 221 |
if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 222 |
-
print(f"[SPECIAL] Basmala detected on segment
|
| 223 |
special_results.append(
|
| 224 |
(SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
|
| 225 |
)
|
| 226 |
-
return vad_segments, segment_audios, special_results, 1
|
| 227 |
|
| 228 |
# ==========================================================================
|
| 229 |
-
# 4. No specials detected
|
| 230 |
# ==========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
print(f"[SPECIAL] No special segments detected "
|
| 232 |
f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
|
| 233 |
|
|
@@ -293,3 +376,57 @@ def detect_inter_chapter_specials(
|
|
| 293 |
print(f"[INTER-CHAPTER] No special segments detected "
|
| 294 |
f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
|
| 295 |
return [], 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Constants
|
| 17 |
# =============================================================================
|
| 18 |
|
| 19 |
+
from config import MAX_SPECIAL_EDIT_DISTANCE, MAX_TRANSITION_EDIT_DISTANCE
|
| 20 |
|
| 21 |
# Special phoneme sequences
|
| 22 |
SPECIAL_PHONEMES = {
|
|
|
|
| 40 |
"Basmala": "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم",
|
| 41 |
}
|
| 42 |
|
| 43 |
+
# Transition phoneme sequences (non-Quranic phrases within recitations)
|
| 44 |
+
TRANSITION_PHONEMES = {
|
| 45 |
+
# آمِين — not in Quran, manually constructed (5 phonemes)
|
| 46 |
+
"Amin": ["ʔ", "a:", "m", "i:", "n"],
|
| 47 |
+
|
| 48 |
+
# اللَّهُ أَكْبَر — standalone, hamza wasl pronounced (12 phonemes)
|
| 49 |
+
# Heavy lam (lˤlˤ) because start of utterance → fatha context
|
| 50 |
+
"Takbir": [
|
| 51 |
+
"ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
|
| 52 |
+
"ʔ", "a", "k", "b", "a", "rˤ",
|
| 53 |
+
],
|
| 54 |
+
|
| 55 |
+
# اللَّهُ أَكْبَر اللَّهُ أَكْبَر — repeated twice in one segment (24 phonemes)
|
| 56 |
+
# Some reciters say Takbir twice. Internal comparison only — display text stays single.
|
| 57 |
+
"Takbir_double": [
|
| 58 |
+
"ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
|
| 59 |
+
"ʔ", "a", "k", "b", "a", "rˤ",
|
| 60 |
+
"ʔ", "a", "lˤlˤ", "aˤ:", "h", "u",
|
| 61 |
+
"ʔ", "a", "k", "b", "a", "rˤ",
|
| 62 |
+
],
|
| 63 |
+
|
| 64 |
+
# سَمِعَ اللَّهُ لِمَنْ حَمِدَه — waqf on final word (22 phonemes)
|
| 65 |
+
# Heavy lam after fatha of سَمِعَ; izhar noon before ح (throat letter)
|
| 66 |
+
"Tahmeed": [
|
| 67 |
+
"s", "a", "m", "i", "ʕ", "a",
|
| 68 |
+
"lˤlˤ", "aˤ:", "h", "u",
|
| 69 |
+
"l", "i", "m", "a", "n",
|
| 70 |
+
"ħ", "a", "m", "i", "d", "a", "h",
|
| 71 |
+
],
|
| 72 |
+
|
| 73 |
+
# سَمِعَ اللَّهُ لِمَنْ حَمِدَهُ رَبَّنَا وَلَكَ الْحَمْد — both in one segment (39 phonemes)
|
| 74 |
+
# Connected speech: هُ keeps damma (no waqf mid-phrase), waqf on الحمد
|
| 75 |
+
"Tahmeed_combined": [
|
| 76 |
+
"s", "a", "m", "i", "ʕ", "a",
|
| 77 |
+
"lˤlˤ", "aˤ:", "h", "u",
|
| 78 |
+
"l", "i", "m", "a", "n",
|
| 79 |
+
"ħ", "a", "m", "i", "d", "a", "h", "u",
|
| 80 |
+
"rˤ", "aˤ", "bb", "a", "n", "a:",
|
| 81 |
+
"w", "a", "l", "a", "k", "a",
|
| 82 |
+
"l", "ħ", "a", "m", "d",
|
| 83 |
+
],
|
| 84 |
+
|
| 85 |
+
# رَبَّنَا وَلَكَ الْحَمْد — response only, for merge detection (17 phonemes)
|
| 86 |
+
# Heavy ra with emphatic fatha; ح is lunar so lam of ال is pronounced
|
| 87 |
+
"Tahmeed_response": [
|
| 88 |
+
"rˤ", "aˤ", "bb", "a", "n", "a:",
|
| 89 |
+
"w", "a", "l", "a", "k", "a",
|
| 90 |
+
"l", "ħ", "a", "m", "d",
|
| 91 |
+
],
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
TRANSITION_TEXT = {
|
| 95 |
+
"Amin": "آمِين",
|
| 96 |
+
"Takbir": "اللَّهُ أَكْبَر",
|
| 97 |
+
"Tahmeed": "سَمِعَ اللَّهُ لِمَنْ حَمِدَه",
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
# All special segment reference names (for unified rendering)
|
| 101 |
+
ALL_SPECIAL_REFS = {"Basmala", "Isti'adha", "Isti'adha+Basmala", "Amin", "Takbir", "Tahmeed"}
|
| 102 |
+
|
| 103 |
|
| 104 |
# =============================================================================
|
| 105 |
# Levenshtein Distance
|
|
|
|
| 201 |
|
| 202 |
special_results: List[Tuple[str, float, str]] = []
|
| 203 |
|
| 204 |
+
# ==========================================================================
|
| 205 |
+
# 0. Check segment 0 for Takbir (recitation opener before Isti'adha/Basmala)
|
| 206 |
+
# ==========================================================================
|
| 207 |
+
takbir_offset = 0
|
| 208 |
seg0_phonemes = phoneme_texts[0] if phoneme_texts[0] else []
|
| 209 |
+
takbir_name, takbir_conf = detect_transition_segment(seg0_phonemes, allowed={"Takbir"})
|
| 210 |
+
if takbir_name:
|
| 211 |
+
print(f"[SPECIAL] Takbir detected on segment 0 (conf={takbir_conf:.2f})")
|
| 212 |
+
special_results.append((TRANSITION_TEXT["Takbir"], takbir_conf, "Takbir"))
|
| 213 |
+
takbir_offset = 1
|
| 214 |
+
# Re-point to the next segment for Isti'adha/Basmala detection
|
| 215 |
+
if len(phoneme_texts) > 1:
|
| 216 |
+
seg0_phonemes = phoneme_texts[1] if phoneme_texts[1] else []
|
| 217 |
+
else:
|
| 218 |
+
return vad_segments, segment_audios, special_results, takbir_offset
|
| 219 |
+
|
| 220 |
+
# seg0_phonemes now points to the first non-Takbir segment
|
| 221 |
+
# (segment 0 if no Takbir, segment 1 if Takbir detected)
|
| 222 |
+
check_idx = takbir_offset # Index into phoneme_texts for Isti'adha/Basmala detection
|
| 223 |
|
| 224 |
# ==========================================================================
|
| 225 |
# 1. Try COMBINED (Isti'adha + Basmala in one segment)
|
|
|
|
| 229 |
if combined_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 230 |
print(f"[SPECIAL] Combined Isti'adha+Basmala detected (dist={combined_dist:.2f})")
|
| 231 |
|
| 232 |
+
# Split the combined segment by midpoint
|
| 233 |
+
seg = vad_segments[check_idx]
|
| 234 |
+
audio = segment_audios[check_idx]
|
| 235 |
mid_time = (seg.start_time + seg.end_time) / 2.0
|
| 236 |
mid_sample = max(1, len(audio) // 2)
|
| 237 |
|
| 238 |
+
# Rebuild vad/audio lists: keep segments before check_idx, split, then rest
|
| 239 |
+
new_vads = list(vad_segments[:check_idx])
|
| 240 |
+
new_audios = list(segment_audios[:check_idx])
|
| 241 |
+
|
| 242 |
+
split_start_idx = len(new_vads)
|
| 243 |
+
new_vads.append(VadSegment(start_time=seg.start_time, end_time=mid_time, segment_idx=split_start_idx))
|
| 244 |
+
new_vads.append(VadSegment(start_time=mid_time, end_time=seg.end_time, segment_idx=split_start_idx + 1))
|
| 245 |
+
new_audios.append(audio[:mid_sample])
|
| 246 |
+
new_audios.append(audio[mid_sample:])
|
| 247 |
|
| 248 |
# Add remaining segments with reindexed segment_idx
|
| 249 |
+
for ii, vs in enumerate(vad_segments[check_idx + 1:], start=split_start_idx + 2):
|
| 250 |
new_vads.append(VadSegment(
|
| 251 |
start_time=vs.start_time,
|
| 252 |
end_time=vs.end_time,
|
| 253 |
+
segment_idx=ii
|
| 254 |
))
|
| 255 |
+
new_audios.extend(segment_audios[check_idx + 1:])
|
| 256 |
|
| 257 |
# Special results for both (confidence = 1 - distance)
|
| 258 |
confidence = 1.0 - combined_dist
|
| 259 |
+
special_results.extend([
|
| 260 |
(SPECIAL_TEXT["Isti'adha"], confidence, "Isti'adha"),
|
| 261 |
(SPECIAL_TEXT["Basmala"], confidence, "Basmala"),
|
| 262 |
+
])
|
| 263 |
|
| 264 |
+
return new_vads, new_audios, special_results, takbir_offset + 2
|
| 265 |
|
| 266 |
# ==========================================================================
|
| 267 |
+
# 2. Try Isti'adha on the check segment
|
| 268 |
# ==========================================================================
|
| 269 |
istiadha_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Isti'adha"])
|
| 270 |
|
| 271 |
if istiadha_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 272 |
+
print(f"[SPECIAL] Isti'adha detected on segment {check_idx} (dist={istiadha_dist:.2f})")
|
| 273 |
special_results.append(
|
| 274 |
(SPECIAL_TEXT["Isti'adha"], 1.0 - istiadha_dist, "Isti'adha")
|
| 275 |
)
|
| 276 |
|
| 277 |
+
# Try Basmala on the next segment
|
| 278 |
+
next_idx = check_idx + 1
|
| 279 |
+
if next_idx < len(phoneme_texts) and phoneme_texts[next_idx]:
|
| 280 |
+
seg1_phonemes = phoneme_texts[next_idx]
|
| 281 |
basmala_dist = phoneme_edit_distance(seg1_phonemes, SPECIAL_PHONEMES["Basmala"])
|
| 282 |
|
| 283 |
if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 284 |
+
print(f"[SPECIAL] Basmala detected on segment {next_idx} (dist={basmala_dist:.2f})")
|
| 285 |
special_results.append(
|
| 286 |
(SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
|
| 287 |
)
|
| 288 |
+
return vad_segments, segment_audios, special_results, takbir_offset + 2
|
| 289 |
else:
|
| 290 |
+
print(f"[SPECIAL] No Basmala on segment {next_idx} (dist={basmala_dist:.2f})")
|
| 291 |
|
| 292 |
+
return vad_segments, segment_audios, special_results, takbir_offset + 1
|
| 293 |
|
| 294 |
# ==========================================================================
|
| 295 |
+
# 3. Try Basmala on the check segment
|
| 296 |
# ==========================================================================
|
| 297 |
basmala_dist = phoneme_edit_distance(seg0_phonemes, SPECIAL_PHONEMES["Basmala"])
|
| 298 |
|
| 299 |
if basmala_dist <= MAX_SPECIAL_EDIT_DISTANCE:
|
| 300 |
+
print(f"[SPECIAL] Basmala detected on segment {check_idx} (dist={basmala_dist:.2f})")
|
| 301 |
special_results.append(
|
| 302 |
(SPECIAL_TEXT["Basmala"], 1.0 - basmala_dist, "Basmala")
|
| 303 |
)
|
| 304 |
+
return vad_segments, segment_audios, special_results, takbir_offset + 1
|
| 305 |
|
| 306 |
# ==========================================================================
|
| 307 |
+
# 4. No specials detected (beyond Takbir if any)
|
| 308 |
# ==========================================================================
|
| 309 |
+
if takbir_offset > 0:
|
| 310 |
+
print(f"[SPECIAL] Only Takbir detected, no Isti'adha/Basmala "
|
| 311 |
+
f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
|
| 312 |
+
return vad_segments, segment_audios, special_results, takbir_offset
|
| 313 |
+
|
| 314 |
print(f"[SPECIAL] No special segments detected "
|
| 315 |
f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
|
| 316 |
|
|
|
|
| 376 |
print(f"[INTER-CHAPTER] No special segments detected "
|
| 377 |
f"(istiadha={istiadha_dist:.2f}, basmala={basmala_dist:.2f})")
|
| 378 |
return [], 0
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
# =============================================================================
|
| 382 |
+
# Transition Segment Detection
|
| 383 |
+
# =============================================================================
|
| 384 |
+
|
| 385 |
+
# Mapping from variant names to their base/display name
|
| 386 |
+
_TRANSITION_BASE_NAMES = {
|
| 387 |
+
"Takbir_double": "Takbir",
|
| 388 |
+
"Tahmeed_combined": "Tahmeed",
|
| 389 |
+
"Tahmeed_response": "Tahmeed",
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def detect_transition_segment(
|
| 394 |
+
asr_phonemes: List[str],
|
| 395 |
+
allowed: Optional[set] = None,
|
| 396 |
+
) -> Tuple[Optional[str], float]:
|
| 397 |
+
"""Best-match transition (lowest edit dist under threshold).
|
| 398 |
+
|
| 399 |
+
Compares against all TRANSITION_PHONEMES entries. For entries with variant
|
| 400 |
+
suffixes (e.g. Takbir_double), the returned name is the base name (Takbir)
|
| 401 |
+
— variants only affect internal matching, not display. Best match = lowest
|
| 402 |
+
normalized edit distance.
|
| 403 |
+
|
| 404 |
+
Args:
|
| 405 |
+
asr_phonemes: ASR output phoneme sequence for one segment
|
| 406 |
+
allowed: Optional set of base names to restrict detection to
|
| 407 |
+
(e.g. {"Amin"} to only check Amin)
|
| 408 |
+
|
| 409 |
+
Returns:
|
| 410 |
+
(name, confidence) where name is the base transition name or None,
|
| 411 |
+
and confidence = 1 - normalized_edit_distance.
|
| 412 |
+
"""
|
| 413 |
+
if not asr_phonemes:
|
| 414 |
+
return None, 0.0
|
| 415 |
+
|
| 416 |
+
best_name = None
|
| 417 |
+
best_dist = float("inf")
|
| 418 |
+
|
| 419 |
+
for key, ref_phonemes in TRANSITION_PHONEMES.items():
|
| 420 |
+
base_name = _TRANSITION_BASE_NAMES.get(key, key)
|
| 421 |
+
if allowed is not None and base_name not in allowed:
|
| 422 |
+
continue
|
| 423 |
+
|
| 424 |
+
dist = phoneme_edit_distance(asr_phonemes, ref_phonemes)
|
| 425 |
+
if dist < best_dist:
|
| 426 |
+
best_dist = dist
|
| 427 |
+
best_name = base_name
|
| 428 |
+
|
| 429 |
+
if best_dist <= MAX_TRANSITION_EDIT_DISTANCE and best_name is not None:
|
| 430 |
+
return best_name, 1.0 - best_dist
|
| 431 |
+
|
| 432 |
+
return None, 0.0
|
src/core/segment_types.py
CHANGED
|
@@ -67,6 +67,7 @@ class ProfilingData:
|
|
| 67 |
segments_attempted: int = 0
|
| 68 |
segments_passed: int = 0
|
| 69 |
special_merges: int = 0
|
|
|
|
| 70 |
# Result building profiling
|
| 71 |
result_build_time: float = 0.0 # Total result building time
|
| 72 |
result_audio_encode_time: float = 0.0 # Audio-to-data-URL encoding
|
|
@@ -140,6 +141,7 @@ class ProfilingData:
|
|
| 140 |
f" Tier 2 Retries: {self.tier2_passed}/{self.tier2_attempts} passed segments: {t2_segs}",
|
| 141 |
f" Reanchors (consec failures): {self.consec_reanchors}",
|
| 142 |
f" Special Merges: {self.special_merges}",
|
|
|
|
| 143 |
"-" * 60,
|
| 144 |
]
|
| 145 |
profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time
|
|
|
|
| 67 |
segments_attempted: int = 0
|
| 68 |
segments_passed: int = 0
|
| 69 |
special_merges: int = 0
|
| 70 |
+
transition_skips: int = 0
|
| 71 |
# Result building profiling
|
| 72 |
result_build_time: float = 0.0 # Total result building time
|
| 73 |
result_audio_encode_time: float = 0.0 # Audio-to-data-URL encoding
|
|
|
|
| 141 |
f" Tier 2 Retries: {self.tier2_passed}/{self.tier2_attempts} passed segments: {t2_segs}",
|
| 142 |
f" Reanchors (consec failures): {self.consec_reanchors}",
|
| 143 |
f" Special Merges: {self.special_merges}",
|
| 144 |
+
f" Transition Skips: {self.transition_skips}",
|
| 145 |
"-" * 60,
|
| 146 |
]
|
| 147 |
profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time
|
src/core/zero_gpu.py
CHANGED
|
@@ -5,7 +5,6 @@ local or non-ZeroGPU environments.
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
-
import time as _time
|
| 9 |
import threading
|
| 10 |
from typing import Callable, TypeVar
|
| 11 |
from functools import wraps
|
|
@@ -18,27 +17,6 @@ ZERO_GPU_AVAILABLE = False
|
|
| 18 |
# Per-thread (per-request) GPU state so concurrent requests don't interfere
|
| 19 |
_request_state = threading.local()
|
| 20 |
|
| 21 |
-
# ---------------------------------------------------------------------------
|
| 22 |
-
# Process-global CUDA health tracking.
|
| 23 |
-
# Unlike _request_state (thread-local, per-request), this affects ALL threads.
|
| 24 |
-
# When CUDA errors occur we mark it unhealthy so no request attempts GPU
|
| 25 |
-
# until the cooldown expires — preventing further poisoning of torch's
|
| 26 |
-
# internal CUDA state.
|
| 27 |
-
# ---------------------------------------------------------------------------
|
| 28 |
-
_cuda_health_lock = threading.Lock()
|
| 29 |
-
_cuda_healthy = True
|
| 30 |
-
_cuda_unhealthy_since = 0.0 # timestamp of first CUDA failure
|
| 31 |
-
_consecutive_cuda_failures = 0 # track persistent failures across cooldowns
|
| 32 |
-
|
| 33 |
-
# ---------------------------------------------------------------------------
|
| 34 |
-
# SDK worker scheduling failure tracking (separate from CUDA hardware errors).
|
| 35 |
-
# Worker errors are transient — the assigned GPU UUID isn't available when
|
| 36 |
-
# worker_init runs. Retrying gets a different GPU via a new schedule() call.
|
| 37 |
-
# ---------------------------------------------------------------------------
|
| 38 |
-
_sdk_worker_lock = threading.Lock()
|
| 39 |
-
_sdk_worker_consecutive_failures = 0
|
| 40 |
-
_sdk_worker_cooldown_until = 0.0 # timestamp when cooldown expires
|
| 41 |
-
|
| 42 |
# ---------------------------------------------------------------------------
|
| 43 |
# Shared RLock for model device transitions AND inference.
|
| 44 |
# RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
|
|
@@ -56,17 +34,6 @@ _active_gpu_leases = 0
|
|
| 56 |
_models_stale = False # Set True at lease end; drained at next lease start
|
| 57 |
|
| 58 |
|
| 59 |
-
# CUDA error patterns that should trigger CPU fallback (not re-raise).
|
| 60 |
-
# These indicate hardware/driver issues where retrying GPU won't help.
|
| 61 |
-
_CUDA_ERROR_PATTERNS = (
|
| 62 |
-
"no cuda gpus are available",
|
| 63 |
-
"cuda must not be initialized",
|
| 64 |
-
"cuda error", "cuda out of memory",
|
| 65 |
-
"cuda driver", "cuda runtime",
|
| 66 |
-
"device-side assert", "cublas", "cudnn error", "nccl",
|
| 67 |
-
"gpu task aborted", # ZeroGPU SDK wraps CUDA errors with this message
|
| 68 |
-
)
|
| 69 |
-
|
| 70 |
try:
|
| 71 |
import spaces # type: ignore
|
| 72 |
|
|
@@ -106,11 +73,6 @@ def _exit_gpu_lease():
|
|
| 106 |
_active_gpu_leases = max(0, _active_gpu_leases - 1)
|
| 107 |
|
| 108 |
|
| 109 |
-
def is_gpu_lease_active():
|
| 110 |
-
"""Check if any thread currently holds a GPU lease."""
|
| 111 |
-
return _active_gpu_leases > 0
|
| 112 |
-
|
| 113 |
-
|
| 114 |
# =========================================================================
|
| 115 |
# Per-thread state helpers
|
| 116 |
# =========================================================================
|
|
@@ -147,128 +109,6 @@ def force_cpu_mode():
|
|
| 147 |
_request_state.user_forced_cpu = True
|
| 148 |
|
| 149 |
|
| 150 |
-
# =========================================================================
|
| 151 |
-
# Process-level CUDA health
|
| 152 |
-
# =========================================================================
|
| 153 |
-
|
| 154 |
-
def _is_cuda_healthy() -> bool:
|
| 155 |
-
"""Check if CUDA is considered healthy at the process level.
|
| 156 |
-
|
| 157 |
-
If unhealthy, checks whether the cooldown has expired.
|
| 158 |
-
If expired, allows exactly one retry by marking healthy again.
|
| 159 |
-
"""
|
| 160 |
-
global _cuda_healthy
|
| 161 |
-
|
| 162 |
-
if _cuda_healthy:
|
| 163 |
-
return True
|
| 164 |
-
|
| 165 |
-
from config import CUDA_COOLDOWN_SECONDS
|
| 166 |
-
with _cuda_health_lock:
|
| 167 |
-
if _cuda_healthy:
|
| 168 |
-
return True # Another thread already recovered
|
| 169 |
-
elapsed = _time.time() - _cuda_unhealthy_since
|
| 170 |
-
if elapsed >= CUDA_COOLDOWN_SECONDS:
|
| 171 |
-
print(f"[CUDA HEALTH] Cooldown expired ({elapsed:.0f}s), allowing GPU retry")
|
| 172 |
-
_try_reset_cuda_state()
|
| 173 |
-
_cuda_healthy = True
|
| 174 |
-
return True
|
| 175 |
-
return False
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
def _mark_cuda_unhealthy():
|
| 179 |
-
"""Mark CUDA as unhealthy process-wide after a CUDA error.
|
| 180 |
-
|
| 181 |
-
All subsequent requests will skip GPU until cooldown expires.
|
| 182 |
-
After MAX_CUDA_FAILURES consecutive failures, restarts the process
|
| 183 |
-
(HF Spaces will restart the container automatically).
|
| 184 |
-
"""
|
| 185 |
-
global _cuda_healthy, _cuda_unhealthy_since, _consecutive_cuda_failures
|
| 186 |
-
with _cuda_health_lock:
|
| 187 |
-
_consecutive_cuda_failures += 1
|
| 188 |
-
if _cuda_healthy:
|
| 189 |
-
_cuda_unhealthy_since = _time.time()
|
| 190 |
-
_cuda_healthy = False
|
| 191 |
-
print(f"[CUDA HEALTH] Marked CUDA unhealthy (consecutive failures: {_consecutive_cuda_failures})")
|
| 192 |
-
|
| 193 |
-
from config import MAX_CUDA_FAILURES
|
| 194 |
-
if _consecutive_cuda_failures >= MAX_CUDA_FAILURES:
|
| 195 |
-
print(f"[CUDA HEALTH] {_consecutive_cuda_failures} consecutive CUDA failures — "
|
| 196 |
-
"process permanently poisoned, forcing restart")
|
| 197 |
-
os._exit(1)
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
def _mark_cuda_success():
|
| 201 |
-
"""Reset consecutive failure counter after a successful GPU operation."""
|
| 202 |
-
global _consecutive_cuda_failures
|
| 203 |
-
with _cuda_health_lock:
|
| 204 |
-
if _consecutive_cuda_failures > 0:
|
| 205 |
-
print(f"[CUDA HEALTH] GPU succeeded, resetting failure counter (was {_consecutive_cuda_failures})")
|
| 206 |
-
_consecutive_cuda_failures = 0
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
def _try_reset_cuda_state():
|
| 210 |
-
"""Reset torch.cuda internal state so a fresh GPU attempt can re-init cleanly.
|
| 211 |
-
|
| 212 |
-
After CUDA poisoning, torch.cuda._initialized remains True even though
|
| 213 |
-
the underlying CUDA context is dead. Resetting it allows the next
|
| 214 |
-
torch.cuda call to re-initialize from scratch.
|
| 215 |
-
"""
|
| 216 |
-
try:
|
| 217 |
-
import torch.cuda as _cuda
|
| 218 |
-
if getattr(_cuda, '_initialized', False):
|
| 219 |
-
_cuda._initialized = False
|
| 220 |
-
print("[CUDA HEALTH] Reset torch.cuda._initialized")
|
| 221 |
-
if hasattr(_cuda, '_queued_calls'):
|
| 222 |
-
_cuda._queued_calls.clear()
|
| 223 |
-
except Exception as e:
|
| 224 |
-
print(f"[CUDA HEALTH] CUDA state reset failed (non-fatal): {e}")
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
# =========================================================================
|
| 228 |
-
# SDK worker scheduling health
|
| 229 |
-
# =========================================================================
|
| 230 |
-
|
| 231 |
-
def _record_sdk_worker_failure():
|
| 232 |
-
"""Record a failed SDK worker retry. After threshold → enter cooldown."""
|
| 233 |
-
global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
|
| 234 |
-
from config import SDK_WORKER_COOLDOWN_SECONDS, SDK_WORKER_FAILURE_THRESHOLD
|
| 235 |
-
with _sdk_worker_lock:
|
| 236 |
-
_sdk_worker_consecutive_failures += 1
|
| 237 |
-
if _sdk_worker_consecutive_failures >= SDK_WORKER_FAILURE_THRESHOLD:
|
| 238 |
-
_sdk_worker_cooldown_until = _time.time() + SDK_WORKER_COOLDOWN_SECONDS
|
| 239 |
-
print(f"[GPU] SDK worker: {_sdk_worker_consecutive_failures} consecutive failures, "
|
| 240 |
-
f"cooldown for {SDK_WORKER_COOLDOWN_SECONDS}s")
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
def _reset_sdk_worker_failures():
|
| 244 |
-
"""Reset SDK worker failure counter after a successful GPU operation."""
|
| 245 |
-
global _sdk_worker_consecutive_failures, _sdk_worker_cooldown_until
|
| 246 |
-
with _sdk_worker_lock:
|
| 247 |
-
if _sdk_worker_consecutive_failures > 0:
|
| 248 |
-
print(f"[GPU] SDK worker: GPU succeeded, resetting failure counter "
|
| 249 |
-
f"(was {_sdk_worker_consecutive_failures})")
|
| 250 |
-
_sdk_worker_consecutive_failures = 0
|
| 251 |
-
_sdk_worker_cooldown_until = 0.0
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
def _is_sdk_worker_healthy() -> bool:
|
| 255 |
-
"""Check if SDK worker scheduling is healthy (not in cooldown).
|
| 256 |
-
|
| 257 |
-
Returns True if no cooldown active or cooldown has expired.
|
| 258 |
-
Auto-resets on expiry so the next attempt can retry GPU.
|
| 259 |
-
"""
|
| 260 |
-
global _sdk_worker_cooldown_until
|
| 261 |
-
with _sdk_worker_lock:
|
| 262 |
-
if _sdk_worker_cooldown_until <= 0.0:
|
| 263 |
-
return True
|
| 264 |
-
now = _time.time()
|
| 265 |
-
if now >= _sdk_worker_cooldown_until:
|
| 266 |
-
print("[GPU] SDK worker cooldown expired, allowing GPU retry")
|
| 267 |
-
_sdk_worker_cooldown_until = 0.0
|
| 268 |
-
return True
|
| 269 |
-
return False
|
| 270 |
-
|
| 271 |
-
|
| 272 |
# =========================================================================
|
| 273 |
# Model cleanup helpers
|
| 274 |
# =========================================================================
|
|
@@ -325,6 +165,11 @@ def gpu_with_fallback(duration=60):
|
|
| 325 |
The model_device_lock is held for the ENTIRE GPU lease (inference +
|
| 326 |
cleanup) to prevent concurrent threads from moving models mid-inference.
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
Usage:
|
| 329 |
@gpu_with_fallback(duration=60)
|
| 330 |
def my_gpu_func(data):
|
|
@@ -341,10 +186,7 @@ def gpu_with_fallback(duration=60):
|
|
| 341 |
with model_device_lock:
|
| 342 |
try:
|
| 343 |
_drain_stale_models()
|
| 344 |
-
|
| 345 |
-
_mark_cuda_success()
|
| 346 |
-
_reset_sdk_worker_failures()
|
| 347 |
-
return result
|
| 348 |
finally:
|
| 349 |
try:
|
| 350 |
_cleanup_after_gpu()
|
|
@@ -373,105 +215,34 @@ def gpu_with_fallback(duration=60):
|
|
| 373 |
print("[GPU] Quota exhausted, using CPU fallback")
|
| 374 |
return func(*args, **kwargs)
|
| 375 |
|
| 376 |
-
#
|
| 377 |
-
# further poisoning of torch's internal CUDA state
|
| 378 |
-
if not _is_cuda_healthy():
|
| 379 |
-
from config import CUDA_COOLDOWN_SECONDS
|
| 380 |
-
remaining = CUDA_COOLDOWN_SECONDS - (_time.time() - _cuda_unhealthy_since)
|
| 381 |
-
print(f"[CUDA HEALTH] CUDA unhealthy, skipping GPU (retry in {remaining:.0f}s)")
|
| 382 |
-
_request_state.gpu_quota_exhausted = True
|
| 383 |
-
try:
|
| 384 |
-
import gradio as gr
|
| 385 |
-
gr.Warning(f"GPU temporarily unavailable — using CPU. Retry in {max(1, int(remaining / 60))}m.")
|
| 386 |
-
except Exception:
|
| 387 |
-
pass
|
| 388 |
-
return func(*args, **kwargs)
|
| 389 |
-
|
| 390 |
-
# If SDK worker scheduling is in cooldown, skip GPU
|
| 391 |
-
if not _is_sdk_worker_healthy():
|
| 392 |
-
print("[GPU] SDK worker in cooldown, skipping GPU")
|
| 393 |
-
_request_state.gpu_quota_exhausted = True
|
| 394 |
-
try:
|
| 395 |
-
import gradio as gr
|
| 396 |
-
gr.Warning("GPU temporarily unavailable — using CPU (slower).")
|
| 397 |
-
except Exception:
|
| 398 |
-
pass
|
| 399 |
-
return func(*args, **kwargs)
|
| 400 |
-
|
| 401 |
-
# Try GPU first
|
| 402 |
try:
|
| 403 |
return gpu_func(*args, **kwargs)
|
| 404 |
except Exception as e:
|
| 405 |
-
|
| 406 |
-
|
|
|
|
|
|
|
| 407 |
is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
|
| 408 |
if not is_quota_error:
|
| 409 |
-
|
| 410 |
-
is_quota_error = 'quota' in err_str and ('exceeded' in err_str or 'exhausted' in err_str)
|
| 411 |
|
| 412 |
if is_quota_error:
|
| 413 |
print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
|
| 414 |
_request_state.gpu_quota_exhausted = True
|
| 415 |
-
|
| 416 |
-
match = re.search(r'Try again in (\d+:\d{2}:\d{2})', str(e))
|
| 417 |
if match:
|
| 418 |
_request_state.quota_reset_time = match.group(1)
|
| 419 |
-
# NOT setting process-global flag: quota is per-user,
|
| 420 |
-
# other users may still have quota available.
|
| 421 |
-
# Show immediate toast notification
|
| 422 |
try:
|
| 423 |
import gradio as gr
|
| 424 |
reset_time = get_quota_reset_time()
|
| 425 |
reset_msg = f" Resets in {reset_time}." if reset_time else ""
|
| 426 |
gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
|
| 427 |
-
except Exception:
|
| 428 |
-
pass # Not in a Gradio context (e.g., CLI usage)
|
| 429 |
-
return func(*args, **kwargs)
|
| 430 |
-
|
| 431 |
-
# Check for CUDA hardware/driver errors (e.g. worker_init failure)
|
| 432 |
-
err_lower = str(e).lower()
|
| 433 |
-
is_cuda_error = any(p in err_lower for p in _CUDA_ERROR_PATTERNS)
|
| 434 |
-
|
| 435 |
-
# SDK wraps worker_init failures as gradio.Error(title="ZeroGPU worker error")
|
| 436 |
-
# with message = just the exception class name. Original CUDA message is lost.
|
| 437 |
-
is_sdk_worker_error = False
|
| 438 |
-
if not is_cuda_error:
|
| 439 |
-
err_title = getattr(e, 'title', '') or ''
|
| 440 |
-
is_sdk_worker_error = 'worker' in err_title.lower() and 'error' in err_title.lower()
|
| 441 |
-
|
| 442 |
-
if is_cuda_error:
|
| 443 |
-
print(f"[GPU] CUDA error, falling back to CPU: {e}")
|
| 444 |
-
_mark_cuda_unhealthy()
|
| 445 |
-
_request_state.gpu_quota_exhausted = True
|
| 446 |
-
try:
|
| 447 |
-
import gradio as gr
|
| 448 |
-
gr.Warning("GPU hardware error — switching to CPU (slower).")
|
| 449 |
-
except Exception:
|
| 450 |
-
pass
|
| 451 |
-
return func(*args, **kwargs)
|
| 452 |
-
|
| 453 |
-
if is_sdk_worker_error:
|
| 454 |
-
# worker_init failed — the assigned GPU UUID wasn't available.
|
| 455 |
-
# This is transient: retrying calls client.schedule() which
|
| 456 |
-
# assigns a different GPU. No sleep needed.
|
| 457 |
-
print(f"[GPU] SDK worker error, retrying GPU (new schedule)...")
|
| 458 |
-
try:
|
| 459 |
-
result = gpu_func(*args, **kwargs)
|
| 460 |
-
_mark_cuda_success()
|
| 461 |
-
_reset_sdk_worker_failures()
|
| 462 |
-
return result
|
| 463 |
-
except Exception as retry_e:
|
| 464 |
-
print(f"[GPU] GPU retry also failed: {retry_e}")
|
| 465 |
-
_record_sdk_worker_failure()
|
| 466 |
-
# Both attempts failed — fall through to CPU
|
| 467 |
-
_request_state.gpu_quota_exhausted = True
|
| 468 |
-
try:
|
| 469 |
-
import gradio as gr
|
| 470 |
-
gr.Warning("GPU temporarily unavailable — using CPU (slower).")
|
| 471 |
except Exception:
|
| 472 |
pass
|
| 473 |
return func(*args, **kwargs)
|
| 474 |
|
|
|
|
| 475 |
is_timeout = (
|
| 476 |
'timeout' in err_lower
|
| 477 |
or 'duration' in err_lower
|
|
@@ -481,19 +252,11 @@ def gpu_with_fallback(duration=60):
|
|
| 481 |
print(f"[GPU] Timeout error in {func.__name__}: {e}")
|
| 482 |
raise
|
| 483 |
|
| 484 |
-
#
|
| 485 |
-
#
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
print(f"[GPU] Unrecognized GPU error, falling back to CPU: {type(e).__name__}: {e}")
|
| 490 |
-
_request_state.gpu_quota_exhausted = True
|
| 491 |
-
try:
|
| 492 |
-
import gradio as gr
|
| 493 |
-
gr.Warning("GPU error — using CPU (slower).")
|
| 494 |
-
except Exception:
|
| 495 |
-
pass
|
| 496 |
-
return func(*args, **kwargs)
|
| 497 |
|
| 498 |
return wrapper
|
| 499 |
return decorator
|
|
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
import re
|
|
|
|
| 8 |
import threading
|
| 9 |
from typing import Callable, TypeVar
|
| 10 |
from functools import wraps
|
|
|
|
| 17 |
# Per-thread (per-request) GPU state so concurrent requests don't interfere
|
| 18 |
_request_state = threading.local()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# ---------------------------------------------------------------------------
|
| 21 |
# Shared RLock for model device transitions AND inference.
|
| 22 |
# RLock because ensure_models_on_gpu() -> move_phoneme_asr_to_gpu() is a
|
|
|
|
| 34 |
_models_stale = False # Set True at lease end; drained at next lease start
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
try:
|
| 38 |
import spaces # type: ignore
|
| 39 |
|
|
|
|
| 73 |
_active_gpu_leases = max(0, _active_gpu_leases - 1)
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# =========================================================================
|
| 77 |
# Per-thread state helpers
|
| 78 |
# =========================================================================
|
|
|
|
| 109 |
_request_state.user_forced_cpu = True
|
| 110 |
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# =========================================================================
|
| 113 |
# Model cleanup helpers
|
| 114 |
# =========================================================================
|
|
|
|
| 165 |
The model_device_lock is held for the ENTIRE GPU lease (inference +
|
| 166 |
cleanup) to prevent concurrent threads from moving models mid-inference.
|
| 167 |
|
| 168 |
+
Error handling strategy:
|
| 169 |
+
- Quota exhaustion → CPU fallback (per-user, not process issue)
|
| 170 |
+
- Timeout → propagate to caller
|
| 171 |
+
- Any other GPU error → os._exit(1) for clean process restart
|
| 172 |
+
|
| 173 |
Usage:
|
| 174 |
@gpu_with_fallback(duration=60)
|
| 175 |
def my_gpu_func(data):
|
|
|
|
| 186 |
with model_device_lock:
|
| 187 |
try:
|
| 188 |
_drain_stale_models()
|
| 189 |
+
return func(*args, **kwargs)
|
|
|
|
|
|
|
|
|
|
| 190 |
finally:
|
| 191 |
try:
|
| 192 |
_cleanup_after_gpu()
|
|
|
|
| 215 |
print("[GPU] Quota exhausted, using CPU fallback")
|
| 216 |
return func(*args, **kwargs)
|
| 217 |
|
| 218 |
+
# Try GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
try:
|
| 220 |
return gpu_func(*args, **kwargs)
|
| 221 |
except Exception as e:
|
| 222 |
+
err_str = str(e)
|
| 223 |
+
err_lower = err_str.lower()
|
| 224 |
+
|
| 225 |
+
# Quota exhaustion → CPU fallback (per-user, not process issue)
|
| 226 |
is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
|
| 227 |
if not is_quota_error:
|
| 228 |
+
is_quota_error = 'quota' in err_lower and ('exceeded' in err_lower or 'exhausted' in err_lower)
|
|
|
|
| 229 |
|
| 230 |
if is_quota_error:
|
| 231 |
print(f"[GPU] Quota exceeded (user-level), falling back to CPU: {e}")
|
| 232 |
_request_state.gpu_quota_exhausted = True
|
| 233 |
+
match = re.search(r'Try again in (\d+:\d{2}:\d{2})', err_str)
|
|
|
|
| 234 |
if match:
|
| 235 |
_request_state.quota_reset_time = match.group(1)
|
|
|
|
|
|
|
|
|
|
| 236 |
try:
|
| 237 |
import gradio as gr
|
| 238 |
reset_time = get_quota_reset_time()
|
| 239 |
reset_msg = f" Resets in {reset_time}." if reset_time else ""
|
| 240 |
gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
except Exception:
|
| 242 |
pass
|
| 243 |
return func(*args, **kwargs)
|
| 244 |
|
| 245 |
+
# Timeout → propagate to caller
|
| 246 |
is_timeout = (
|
| 247 |
'timeout' in err_lower
|
| 248 |
or 'duration' in err_lower
|
|
|
|
| 252 |
print(f"[GPU] Timeout error in {func.__name__}: {e}")
|
| 253 |
raise
|
| 254 |
|
| 255 |
+
# ANY other GPU error → process is poisoned, kill immediately.
|
| 256 |
+
# HF Spaces auto-restarts the container with a fresh process.
|
| 257 |
+
print(f"[GPU] Fatal GPU error: {type(e).__name__}: {e}")
|
| 258 |
+
print("[GPU] Restarting process to recover clean GPU state...")
|
| 259 |
+
os._exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
return wrapper
|
| 262 |
return decorator
|
src/pipeline.py
CHANGED
|
@@ -174,8 +174,12 @@ def _run_post_vad_pipeline(
|
|
| 174 |
|
| 175 |
# If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
|
| 176 |
# with empty placeholders so indices stay aligned.
|
|
|
|
|
|
|
|
|
|
| 177 |
if len(vad_segments) != len(phoneme_texts):
|
| 178 |
-
|
|
|
|
| 179 |
|
| 180 |
# Anchor detection via phoneme n-gram voting
|
| 181 |
progress(*progress_steps["anchor"])
|
|
@@ -206,7 +210,7 @@ def _run_post_vad_pipeline(
|
|
| 206 |
|
| 207 |
# Phoneme-based DP alignment
|
| 208 |
match_start = time.time()
|
| 209 |
-
match_results, match_profiling, gap_segments = run_phoneme_matching(
|
| 210 |
phoneme_texts,
|
| 211 |
surah,
|
| 212 |
first_quran_idx,
|
|
@@ -237,6 +241,7 @@ def _run_post_vad_pipeline(
|
|
| 237 |
profiling.tier2_segments = match_profiling.get("tier2_segments", [])
|
| 238 |
profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
|
| 239 |
profiling.special_merges = match_profiling.get("special_merges", 0)
|
|
|
|
| 240 |
profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
|
| 241 |
profiling.segments_passed = match_profiling.get("segments_passed", 0)
|
| 242 |
|
|
@@ -268,9 +273,19 @@ def _run_post_vad_pipeline(
|
|
| 268 |
_underseg_by_words: list[int] = []
|
| 269 |
_underseg_by_ayah: list[int] = []
|
| 270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
|
| 272 |
zip(vad_segments, match_results)
|
| 273 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
if idx == last_display_idx and matched_ref:
|
| 275 |
if not is_end_of_verse(matched_ref):
|
| 276 |
score = max(0.0, score - 0.25)
|
|
@@ -283,13 +298,15 @@ def _run_post_vad_pipeline(
|
|
| 283 |
matched_ref = ""
|
| 284 |
error = f"Low confidence ({score:.0%})"
|
| 285 |
|
| 286 |
-
|
|
|
|
|
|
|
| 287 |
word_count, ayah_span = get_segment_word_stats(matched_ref)
|
| 288 |
underseg = check_undersegmented(matched_ref, duration)
|
| 289 |
|
| 290 |
segments.append(SegmentInfo(
|
| 291 |
start_time=seg.start_time,
|
| 292 |
-
end_time=
|
| 293 |
transcribed_text=phoneme_text,
|
| 294 |
matched_text=matched_text,
|
| 295 |
matched_ref=matched_ref,
|
|
|
|
| 174 |
|
| 175 |
# If segments were split (combined Isti'adha+Basmala), pad phoneme_texts
|
| 176 |
# with empty placeholders so indices stay aligned.
|
| 177 |
+
# The split replaces one segment with two, so vad_segments is 1 longer.
|
| 178 |
+
# Insert an empty placeholder at the split position (= first_quran_idx - 2
|
| 179 |
+
# is where the combined segment was, but simpler: find the gap).
|
| 180 |
if len(vad_segments) != len(phoneme_texts):
|
| 181 |
+
split_idx = first_quran_idx - 2 # Combined was split into 2 entries starting here
|
| 182 |
+
phoneme_texts = phoneme_texts[:split_idx] + [[], []] + phoneme_texts[split_idx + 1:]
|
| 183 |
|
| 184 |
# Anchor detection via phoneme n-gram voting
|
| 185 |
progress(*progress_steps["anchor"])
|
|
|
|
| 210 |
|
| 211 |
# Phoneme-based DP alignment
|
| 212 |
match_start = time.time()
|
| 213 |
+
match_results, match_profiling, gap_segments, merged_into = run_phoneme_matching(
|
| 214 |
phoneme_texts,
|
| 215 |
surah,
|
| 216 |
first_quran_idx,
|
|
|
|
| 241 |
profiling.tier2_segments = match_profiling.get("tier2_segments", [])
|
| 242 |
profiling.consec_reanchors = match_profiling.get("consec_reanchors", 0)
|
| 243 |
profiling.special_merges = match_profiling.get("special_merges", 0)
|
| 244 |
+
profiling.transition_skips = match_profiling.get("transition_skips", 0)
|
| 245 |
profiling.segments_attempted = match_profiling.get("segments_attempted", 0)
|
| 246 |
profiling.segments_passed = match_profiling.get("segments_passed", 0)
|
| 247 |
|
|
|
|
| 273 |
_underseg_by_words: list[int] = []
|
| 274 |
_underseg_by_ayah: list[int] = []
|
| 275 |
|
| 276 |
+
# Pre-compute merged end times: extend target segment's end_time
|
| 277 |
+
_merged_end_times = {} # {target_idx: extended_end_time}
|
| 278 |
+
for consumed_idx, target_idx in merged_into.items():
|
| 279 |
+
if consumed_idx < len(vad_segments):
|
| 280 |
+
_merged_end_times[target_idx] = vad_segments[consumed_idx].end_time
|
| 281 |
+
|
| 282 |
for idx, (seg, (matched_text, score, matched_ref)) in enumerate(
|
| 283 |
zip(vad_segments, match_results)
|
| 284 |
):
|
| 285 |
+
# Skip segments consumed by Tahmeed merge
|
| 286 |
+
if idx in merged_into:
|
| 287 |
+
continue
|
| 288 |
+
|
| 289 |
if idx == last_display_idx and matched_ref:
|
| 290 |
if not is_end_of_verse(matched_ref):
|
| 291 |
score = max(0.0, score - 0.25)
|
|
|
|
| 298 |
matched_ref = ""
|
| 299 |
error = f"Low confidence ({score:.0%})"
|
| 300 |
|
| 301 |
+
# Extend end_time if this segment absorbed a merged segment
|
| 302 |
+
seg_end_time = _merged_end_times.get(idx, seg.end_time)
|
| 303 |
+
duration = seg_end_time - seg.start_time
|
| 304 |
word_count, ayah_span = get_segment_word_stats(matched_ref)
|
| 305 |
underseg = check_undersegmented(matched_ref, duration)
|
| 306 |
|
| 307 |
segments.append(SegmentInfo(
|
| 308 |
start_time=seg.start_time,
|
| 309 |
+
end_time=seg_end_time,
|
| 310 |
transcribed_text=phoneme_text,
|
| 311 |
matched_text=matched_text,
|
| 312 |
matched_ref=matched_ref,
|
src/ui/segments.py
CHANGED
|
@@ -16,6 +16,7 @@ from config import (
|
|
| 16 |
SURAH_INFO_PATH,
|
| 17 |
)
|
| 18 |
from src.core.segment_types import SegmentInfo
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
def format_timestamp(seconds: float) -> str:
|
|
@@ -237,11 +238,14 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
|
|
| 237 |
render_key: Unique key to prevent browser caching between renders
|
| 238 |
segment_dir: Directory to write segment WAV files into
|
| 239 |
"""
|
|
|
|
| 240 |
confidence_class = get_confidence_class(seg.match_score)
|
| 241 |
confidence_badge_class = confidence_class # preserve original for badge color
|
| 242 |
-
if
|
|
|
|
|
|
|
| 243 |
confidence_class = "segment-low"
|
| 244 |
-
|
| 245 |
confidence_class = "segment-underseg"
|
| 246 |
|
| 247 |
timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
|
|
@@ -330,7 +334,12 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
|
|
| 330 |
else:
|
| 331 |
text_html = ""
|
| 332 |
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
# Build inline header: Segment N | ref | duration | time range
|
| 336 |
header_parts = [f"Segment {idx + 1}"]
|
|
@@ -389,9 +398,11 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
|
|
| 389 |
wf.writeframes(audio_int16.tobytes())
|
| 390 |
full_audio_url = f"/gradio_api/file={full_path}"
|
| 391 |
|
| 392 |
-
# Categorize segments by confidence level (1-indexed for display)
|
| 393 |
-
med_segments = [i + 1 for i, s in enumerate(segments)
|
| 394 |
-
|
|
|
|
|
|
|
| 395 |
|
| 396 |
# Build header with confidence summary
|
| 397 |
header_parts = []
|
|
|
|
| 16 |
SURAH_INFO_PATH,
|
| 17 |
)
|
| 18 |
from src.core.segment_types import SegmentInfo
|
| 19 |
+
from src.alignment.special_segments import ALL_SPECIAL_REFS
|
| 20 |
|
| 21 |
|
| 22 |
def format_timestamp(seconds: float) -> str:
|
|
|
|
| 238 |
render_key: Unique key to prevent browser caching between renders
|
| 239 |
segment_dir: Directory to write segment WAV files into
|
| 240 |
"""
|
| 241 |
+
is_special = seg.matched_ref in ALL_SPECIAL_REFS
|
| 242 |
confidence_class = get_confidence_class(seg.match_score)
|
| 243 |
confidence_badge_class = confidence_class # preserve original for badge color
|
| 244 |
+
if is_special:
|
| 245 |
+
confidence_class = "segment-special"
|
| 246 |
+
elif seg.has_missing_words:
|
| 247 |
confidence_class = "segment-low"
|
| 248 |
+
elif seg.potentially_undersegmented and confidence_class != "segment-low":
|
| 249 |
confidence_class = "segment-underseg"
|
| 250 |
|
| 251 |
timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
|
|
|
|
| 334 |
else:
|
| 335 |
text_html = ""
|
| 336 |
|
| 337 |
+
if is_special:
|
| 338 |
+
confidence_badge = f'<div class="segment-badge segment-special-badge">{seg.matched_ref}</div>'
|
| 339 |
+
elif seg.has_missing_words:
|
| 340 |
+
confidence_badge = ""
|
| 341 |
+
else:
|
| 342 |
+
confidence_badge = f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
|
| 343 |
|
| 344 |
# Build inline header: Segment N | ref | duration | time range
|
| 345 |
header_parts = [f"Segment {idx + 1}"]
|
|
|
|
| 398 |
wf.writeframes(audio_int16.tobytes())
|
| 399 |
full_audio_url = f"/gradio_api/file={full_path}"
|
| 400 |
|
| 401 |
+
# Categorize segments by confidence level (1-indexed for display), excluding specials
|
| 402 |
+
med_segments = [i + 1 for i, s in enumerate(segments)
|
| 403 |
+
if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH and s.matched_ref not in ALL_SPECIAL_REFS]
|
| 404 |
+
low_segments = [i + 1 for i, s in enumerate(segments)
|
| 405 |
+
if s.match_score < CONFIDENCE_MED and s.matched_ref not in ALL_SPECIAL_REFS]
|
| 406 |
|
| 407 |
# Build header with confidence summary
|
| 408 |
header_parts = []
|
src/ui/styles.py
CHANGED
|
@@ -400,6 +400,8 @@ def build_css() -> str:
|
|
| 400 |
.segment-low-badge {{ background: #dc3545; }}
|
| 401 |
.segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
|
| 402 |
.segment-underseg-badge {{ background: #ff8c00; }}
|
|
|
|
|
|
|
| 403 |
|
| 404 |
/* Review summary text colors */
|
| 405 |
.segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
|
|
@@ -421,11 +423,13 @@ def build_css() -> str:
|
|
| 421 |
.segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
|
| 422 |
.segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
|
| 423 |
.segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
|
|
|
| 424 |
}}
|
| 425 |
/* Also support Gradio's dark class */
|
| 426 |
.dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
|
| 427 |
.dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
|
| 428 |
.dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
|
| 429 |
.dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
|
|
|
| 430 |
|
| 431 |
"""
|
|
|
|
| 400 |
.segment-low-badge {{ background: #dc3545; }}
|
| 401 |
.segment-underseg {{ background: #ffe5cc; border-color: #ff8c00; }}
|
| 402 |
.segment-underseg-badge {{ background: #ff8c00; }}
|
| 403 |
+
.segment-special {{ background: #e8eaf6; border-color: #5c6bc0; border-style: dashed; }}
|
| 404 |
+
.segment-special-badge {{ background: #5c6bc0; }}
|
| 405 |
|
| 406 |
/* Review summary text colors */
|
| 407 |
.segments-review-summary {{ margin-bottom: 8px; font-size: 14px; }}
|
|
|
|
| 423 |
.segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
|
| 424 |
.segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
|
| 425 |
.segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
| 426 |
+
.segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
|
| 427 |
}}
|
| 428 |
/* Also support Gradio's dark class */
|
| 429 |
.dark .segment-high {{ background: rgba(40, 167, 69, 0.2); border-color: #28a745; }}
|
| 430 |
.dark .segment-med {{ background: rgba(255, 193, 7, 0.2); border-color: #ffc107; }}
|
| 431 |
.dark .segment-low {{ background: rgba(220, 53, 69, 0.2); border-color: #dc3545; }}
|
| 432 |
.dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
| 433 |
+
.dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
|
| 434 |
|
| 435 |
"""
|