Spaces:
Running on Zero
Running on Zero
| """Orchestration for phoneme-based alignment and retries.""" | |
| from typing import List, Tuple | |
| from config import ( | |
| ANCHOR_SEGMENTS, | |
| MAX_CONSECUTIVE_FAILURES, | |
| RETRY_LOOKBACK_WORDS, | |
| RETRY_LOOKAHEAD_WORDS, | |
| MAX_EDIT_DISTANCE_RELAXED, | |
| PHONEME_ALIGNMENT_PROFILING, | |
| ) | |
| from src.core.debug_collector import get_debug_collector | |
| def _debug_alignment_result(alignment, chapter_ref): | |
| """Extract JSON-safe dict from an AlignmentResult for the debug collector.""" | |
| if alignment is None: | |
| return None | |
| return { | |
| "matched_ref": alignment.matched_ref, | |
| "start_word_idx": alignment.start_word_idx, | |
| "end_word_idx": alignment.end_word_idx, | |
| "edit_cost": round(alignment.edit_cost, 4), | |
| "confidence": round(alignment.confidence, 4), | |
| "j_start": alignment.j_start, | |
| "best_j": alignment.best_j, | |
| "basmala_consumed": alignment.basmala_consumed, | |
| "n_wraps": alignment.n_wraps, | |
| "wrap_points": alignment.wrap_points, | |
| "wrap_word_ranges": alignment.wrap_word_ranges, | |
| } | |
| def run_phoneme_matching( | |
| phoneme_texts: List[List[str]], | |
| detected_surah: int, | |
| first_quran_idx: int = 0, | |
| special_results: List[tuple] = None, | |
| start_pointer: int = 0, | |
| ) -> Tuple[List[tuple], dict, set, dict, set]: | |
| """ | |
| Phoneme-based segment matching using substring DP. | |
| Args: | |
| phoneme_texts: List of phoneme lists (each is a list of phoneme strings) | |
| detected_surah: Surah number from anchor search | |
| first_quran_idx: Index where Quran segments start (after specials) | |
| special_results: Results for special segments (Isti'adha/Basmala) | |
| start_pointer: Initial word pointer from anchor voting | |
| Returns: | |
| (results, profiling_dict, gap_segments, merged_into, repetition_segments) | |
| results: List[(matched_text, score, matched_ref, wrap_word_ranges_or_None), ...] | |
| merged_into: dict mapping consumed segment indices to their target segment index | |
| repetition_segments: set of segment indices where wraps were detected | |
| """ | |
| from .phoneme_matcher import align_segment, get_matched_text | |
| from .phoneme_matcher_cache import get_chapter_reference | |
| from .phoneme_anchor import verse_to_word_index, find_anchor_by_voting | |
| from .ngram_index import get_ngram_index | |
| # Only import time if profiling enabled | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| import time | |
| total_start = time.perf_counter() | |
| ref_build_start = time.perf_counter() | |
| # Build/get cached chapter reference (includes phonemizer call if not cached) | |
| chapter_ref = get_chapter_reference(detected_surah) | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| ref_build_time = time.perf_counter() - ref_build_start | |
| # Initialize results with special segments | |
| results = list(special_results) if special_results else [] | |
| # Parallel list: None for specials/failures, (start_word_idx, end_word_idx) for matches | |
| word_indices = [None] * len(results) | |
| # Timing accumulators (only used if profiling enabled) | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times = [] | |
| window_setup_total = 0.0 | |
| result_build_total = 0.0 | |
| # Track whether the next segment might have Basmala fused with verse content | |
| from .special_segments import ( | |
| SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT, | |
| detect_transition_segment, detect_inter_chapter_specials, | |
| ) | |
| basmala_already_detected = any( | |
| r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or []) | |
| ) | |
| is_first_after_transition = not basmala_already_detected | |
| special_merges = 0 | |
| # Transition segment state | |
| transition_mode = False | |
| transition_skips = 0 | |
| tahmeed_merge_skip = 0 | |
| merged_into = {} # {consumed_idx: target_idx} | |
| # Gap and repetition tracking | |
| gap_segments = set() | |
| repetition_segments = set() | |
| transition_expected_pointer = -1 # -1 = no pending check | |
| def _check_transition_gap(start_word_idx): | |
| """Flag missing words at start of new chapter after transition.""" | |
| nonlocal transition_expected_pointer | |
| if transition_expected_pointer < 0: | |
| return | |
| if start_word_idx > transition_expected_pointer: | |
| seg_idx = len(word_indices) - 1 | |
| gap_segments.add(seg_idx) | |
| gap = start_word_idx - transition_expected_pointer | |
| print(f" [GAP] {gap} word(s) missing at start of chapter after transition " | |
| f"(expected word {transition_expected_pointer}, got {start_word_idx})") | |
| transition_expected_pointer = -1 | |
| # Process Quran segments with phoneme alignment | |
| pointer = start_pointer | |
| num_segments = 0 | |
| consecutive_failures = 0 | |
| skip_count = 0 | |
| pending_specials = [] | |
| tier1_attempts = 0 | |
| tier1_passed = 0 | |
| tier1_segments = [] | |
| tier2_attempts = 0 | |
| tier2_passed = 0 | |
| tier2_segments = [] | |
| consec_reanchors = 0 | |
| segments_attempted = 0 | |
| segments_passed = 0 | |
| for i, asr_phonemes in enumerate(phoneme_texts[first_quran_idx:]): | |
| # Handle segments consumed by inter-chapter special detection | |
| if skip_count > 0: | |
| results.append(pending_specials.pop(0)) | |
| word_indices.append(None) | |
| skip_count -= 1 | |
| continue | |
| # Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments) | |
| if tahmeed_merge_skip > 0: | |
| # This segment's audio was merged into the previous Tahmeed segment | |
| results.append(("", 0.0, "", None)) | |
| word_indices.append(None) | |
| tahmeed_merge_skip -= 1 | |
| transition_skips += 1 | |
| continue | |
| segment_idx = first_quran_idx + i + 1 # 1-indexed for display | |
| segments_attempted += 1 | |
| # Transition mode: keep checking for transitions before trying alignment | |
| if transition_mode: | |
| trans_name, trans_conf = detect_transition_segment(asr_phonemes) | |
| if trans_name: | |
| print(f" [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("transition_detected", segment_idx=segment_idx, | |
| transition_type=trans_name, confidence=round(trans_conf, 4), | |
| context="transition_mode") | |
| results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None)) | |
| word_indices.append(None) | |
| transition_skips += 1 | |
| # Tahmeed peek-ahead for merge | |
| if trans_name == "Tahmeed": | |
| next_abs = first_quran_idx + i + 1 | |
| if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]: | |
| resp_name, resp_conf = detect_transition_segment( | |
| phoneme_texts[next_abs], allowed={"Tahmeed"}) | |
| if resp_name: | |
| merged_into[next_abs] = first_quran_idx + i | |
| tahmeed_merge_skip = 1 | |
| print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed") | |
| if _dc is not None: | |
| _dc.add_event("tahmeed_merge", segment_idx=segment_idx, | |
| merged_segment=next_abs) | |
| continue | |
| else: | |
| # Exit transition mode, global reanchor | |
| transition_mode = False | |
| print(f" [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...") | |
| remaining_idx = first_quran_idx + i | |
| remaining_texts = phoneme_texts[remaining_idx:] | |
| if remaining_texts: | |
| reanchor_surah, reanchor_ayah = find_anchor_by_voting( | |
| remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS, | |
| ) | |
| if reanchor_surah > 0: | |
| if reanchor_surah != detected_surah: | |
| detected_surah = reanchor_surah | |
| chapter_ref = get_chapter_reference(detected_surah) | |
| pointer = verse_to_word_index(chapter_ref, reanchor_ayah) | |
| transition_expected_pointer = pointer | |
| print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, " | |
| f"Ayah {reanchor_ayah}, word {pointer}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("reanchor", at_segment=segment_idx, | |
| reason="transition_mode_exit", | |
| new_surah=detected_surah, | |
| new_ayah=reanchor_ayah, new_pointer=pointer) | |
| consecutive_failures = 0 | |
| # Fall through to normal alignment below | |
| alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx) | |
| num_segments += 1 | |
| # Accumulate timing if profiling enabled | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times.append(timing['dp_time']) | |
| window_setup_total += timing['window_setup_time'] | |
| result_build_total += timing['result_build_time'] | |
| # Debug collector: primary alignment attempt | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_alignment_result( | |
| segment_idx, asr_phonemes, | |
| window={"pointer": pointer, "surah": detected_surah}, | |
| expected_pointer=pointer, | |
| result=_debug_alignment_result(alignment, chapter_ref), | |
| timing=timing, | |
| ) | |
| # Chapter transition: pointer past end of chapter | |
| if alignment is None and pointer >= chapter_ref.num_words: | |
| remaining_phonemes = phoneme_texts[first_quran_idx + i:] | |
| amin_consumed = 0 | |
| if chapter_ref.surah == 1: | |
| # Check for Amin after Al-Fatiha before inter-chapter specials | |
| amin_name, amin_conf = detect_transition_segment( | |
| asr_phonemes, allowed={"Amin"}) | |
| if amin_name: | |
| print(f" [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})") | |
| results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin", None)) | |
| word_indices.append(None) | |
| transition_skips += 1 | |
| amin_consumed = 1 | |
| # Re-slice remaining phonemes to start after Amin | |
| remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:] | |
| inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes) | |
| if chapter_ref.surah == 1: | |
| # After Al-Fatiha, the next chapter could be anything — global reanchor | |
| print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, " | |
| f"running global reanchor...") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("chapter_end", at_segment=segment_idx, | |
| from_surah=1, next_action="global_reanchor") | |
| # Use segments after Amin + specials for anchor voting | |
| anchor_offset = first_quran_idx + i + amin_consumed + num_consumed | |
| anchor_remaining = phoneme_texts[anchor_offset:] | |
| reanchor_surah, reanchor_ayah = find_anchor_by_voting( | |
| anchor_remaining, get_ngram_index(), ANCHOR_SEGMENTS, | |
| ) | |
| if reanchor_surah > 0: | |
| next_surah = reanchor_surah | |
| chapter_ref = get_chapter_reference(next_surah) | |
| pointer = verse_to_word_index(chapter_ref, reanchor_ayah) | |
| # Don't set transition_expected_pointer — after Surah 1 the next | |
| # chapter is arbitrary (global reanchor), so gaps are expected. | |
| print(f" [GLOBAL-REANCHOR] Anchored to Surah {next_surah}, " | |
| f"Ayah {reanchor_ayah}, word {pointer}") | |
| else: | |
| # Fallback: assume chapter 2 | |
| next_surah = 2 | |
| chapter_ref = get_chapter_reference(next_surah) | |
| pointer = 0 | |
| print(f" [GLOBAL-REANCHOR] No anchor found, falling back to Surah 2") | |
| else: | |
| next_surah = chapter_ref.surah + 1 | |
| if next_surah > 114: | |
| pass # No more chapters — fall through to failure handling | |
| else: | |
| # Check for transition before committing to next sequential surah | |
| if num_consumed == 0: | |
| trans_name, trans_conf = detect_transition_segment(asr_phonemes) | |
| if trans_name: | |
| print(f" [CHAPTER-END-TRANSITION] Segment {segment_idx}: {trans_name} " | |
| f"at end of Surah {chapter_ref.surah} (conf={trans_conf:.2f})") | |
| results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None)) | |
| word_indices.append(None) | |
| transition_skips += 1 | |
| transition_mode = True | |
| detected_surah = next_surah | |
| chapter_ref = get_chapter_reference(next_surah) | |
| pointer = 0 | |
| transition_expected_pointer = 0 | |
| consecutive_failures = 0 | |
| continue | |
| print(f" [CHAPTER-END] Surah {chapter_ref.surah} complete at segment {segment_idx}, " | |
| f"transitioning to Surah {next_surah}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("chapter_transition", at_segment=segment_idx, | |
| from_surah=chapter_ref.surah, to_surah=next_surah) | |
| chapter_ref = get_chapter_reference(next_surah) | |
| pointer = 0 | |
| transition_expected_pointer = 0 | |
| if next_surah <= 114: | |
| detected_surah = next_surah | |
| consecutive_failures = 0 | |
| if amin_consumed > 0: | |
| # Current segment was Amin (already appended above). | |
| # Queue inter-chapter specials for subsequent segments. | |
| has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials) | |
| is_first_after_transition = not has_basmala | |
| if num_consumed > 0: | |
| pending_specials = list(inter_specials) | |
| skip_count = num_consumed | |
| else: | |
| is_first_after_transition = True | |
| continue | |
| if num_consumed > 0: | |
| has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials) | |
| is_first_after_transition = not has_basmala | |
| # Current segment is a special — append its result | |
| results.append(inter_specials[0]) | |
| word_indices.append(None) | |
| # Queue remaining specials for subsequent segments | |
| if num_consumed > 1: | |
| pending_specials = list(inter_specials[1:]) | |
| skip_count = num_consumed - 1 | |
| continue | |
| else: | |
| is_first_after_transition = True | |
| # No specials — re-try alignment on this segment against the new chapter | |
| alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx) | |
| num_segments += 1 | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times.append(timing['dp_time']) | |
| window_setup_total += timing['window_setup_time'] | |
| result_build_total += timing['result_build_time'] | |
| # Fall through to existing if/else below | |
| # Basmala-fused retry: if this is the first segment after a transition | |
| # and Basmala wasn't detected, the reciter may have merged Basmala with | |
| # the first verse. Always try prepending Basmala phonemes to R and pick | |
| # the better result (even if the plain alignment already succeeded). | |
| if is_first_after_transition: | |
| is_first_after_transition = False | |
| basmala_alignment, basmala_timing = align_segment( | |
| asr_phonemes, chapter_ref, pointer, segment_idx, | |
| basmala_prefix=True) | |
| num_segments += 1 | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times.append(basmala_timing['dp_time']) | |
| window_setup_total += basmala_timing['window_setup_time'] | |
| result_build_total += basmala_timing['result_build_time'] | |
| if basmala_alignment and basmala_alignment.basmala_consumed: | |
| existing_conf = alignment.confidence if alignment else 0.0 | |
| if basmala_alignment.confidence > existing_conf: | |
| matched_text = SPECIAL_TEXT["Basmala"] + " " + get_matched_text(chapter_ref, basmala_alignment) | |
| result = (matched_text, basmala_alignment.confidence, basmala_alignment.matched_ref, | |
| basmala_alignment.wrap_word_ranges) | |
| pointer = basmala_alignment.end_word_idx + 1 | |
| consecutive_failures = 0 | |
| word_indices.append((basmala_alignment.start_word_idx, basmala_alignment.end_word_idx)) | |
| _check_transition_gap(basmala_alignment.start_word_idx) | |
| if basmala_alignment.n_wraps > 0: | |
| repetition_segments.add(len(results)) | |
| results.append(result) | |
| special_merges += 1 | |
| segments_passed += 1 | |
| print(f" [BASMALA-FUSED] Segment {segment_idx}: Basmala merged with verse " | |
| f"(fused conf={basmala_alignment.confidence:.2f} > plain conf={existing_conf:.2f})") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("basmala_fused", segment_idx=segment_idx, | |
| fused_conf=round(basmala_alignment.confidence, 4), | |
| plain_conf=round(existing_conf, 4), chose="fused") | |
| continue | |
| # Basmala-fused didn't win — fall through with original alignment | |
| if alignment: | |
| is_first_after_transition = False | |
| matched_text = get_matched_text(chapter_ref, alignment) | |
| result = (matched_text, alignment.confidence, alignment.matched_ref, | |
| alignment.wrap_word_ranges) | |
| pointer = alignment.end_word_idx + 1 # Advance pointer | |
| consecutive_failures = 0 | |
| word_indices.append((alignment.start_word_idx, alignment.end_word_idx)) | |
| _check_transition_gap(alignment.start_word_idx) | |
| if alignment.n_wraps > 0: | |
| repetition_segments.add(len(results)) | |
| segments_passed += 1 | |
| else: | |
| # === Check for transition segment before retry tiers === | |
| trans_name, trans_conf = detect_transition_segment(asr_phonemes) | |
| if trans_name: | |
| print(f" [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("transition_detected", segment_idx=segment_idx, | |
| transition_type=trans_name, confidence=round(trans_conf, 4), | |
| context="pre_retry") | |
| result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name, None) | |
| word_indices.append(None) | |
| transition_skips += 1 | |
| transition_mode = True | |
| # Tahmeed peek-ahead for merge | |
| if trans_name == "Tahmeed": | |
| next_abs = first_quran_idx + i + 1 | |
| if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]: | |
| resp_name, resp_conf = detect_transition_segment( | |
| phoneme_texts[next_abs], allowed={"Tahmeed"}) | |
| if resp_name: | |
| merged_into[next_abs] = first_quran_idx + i | |
| tahmeed_merge_skip = 1 | |
| print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed") | |
| if _dc is not None: | |
| _dc.add_event("tahmeed_merge", segment_idx=segment_idx, | |
| merged_segment=next_abs) | |
| results.append(result) | |
| continue | |
| # === Graduated retry === | |
| # Tier 1: expanded window, same threshold | |
| tier1_attempts += 1 | |
| tier1_segments.append(segment_idx) | |
| alignment, timing = align_segment( | |
| asr_phonemes, chapter_ref, pointer, segment_idx, | |
| lookback_override=RETRY_LOOKBACK_WORDS, | |
| lookahead_override=RETRY_LOOKAHEAD_WORDS, | |
| ) | |
| num_segments += 1 | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times.append(timing['dp_time']) | |
| window_setup_total += timing['window_setup_time'] | |
| result_build_total += timing['result_build_time'] | |
| # Tier 2: expanded window + relaxed threshold | |
| tier2_entered = False | |
| if alignment is None: | |
| tier2_entered = True | |
| tier2_attempts += 1 | |
| tier2_segments.append(segment_idx) | |
| alignment, timing = align_segment( | |
| asr_phonemes, chapter_ref, pointer, segment_idx, | |
| lookback_override=RETRY_LOOKBACK_WORDS, | |
| lookahead_override=RETRY_LOOKAHEAD_WORDS, | |
| max_edit_distance_override=MAX_EDIT_DISTANCE_RELAXED, | |
| ) | |
| num_segments += 1 | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| dp_times.append(timing['dp_time']) | |
| window_setup_total += timing['window_setup_time'] | |
| result_build_total += timing['result_build_time'] | |
| if alignment: | |
| # Retry succeeded | |
| is_first_after_transition = False | |
| matched_text = get_matched_text(chapter_ref, alignment) | |
| result = (matched_text, alignment.confidence, alignment.matched_ref, | |
| alignment.wrap_word_ranges) | |
| pointer = alignment.end_word_idx + 1 | |
| consecutive_failures = 0 | |
| word_indices.append((alignment.start_word_idx, alignment.end_word_idx)) | |
| _check_transition_gap(alignment.start_word_idx) | |
| if alignment.n_wraps > 0: | |
| repetition_segments.add(len(results)) | |
| segments_passed += 1 | |
| tier_name = "tier2" if tier2_entered else "tier1" | |
| if tier2_entered: | |
| tier2_passed += 1 | |
| else: | |
| tier1_passed += 1 | |
| print(f" [RETRY-OK] Segment {segment_idx}: recovered via expanded window/relaxed threshold") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_alignment_result( | |
| segment_idx, asr_phonemes, | |
| window={"pointer": pointer - 1, "surah": detected_surah}, | |
| expected_pointer=pointer - 1, | |
| result=_debug_alignment_result(alignment, chapter_ref), | |
| timing=timing, retry_tier=tier_name, | |
| ) | |
| _dc.add_event(f"retry_{tier_name}", segment_idx=segment_idx, | |
| passed=True, confidence=round(alignment.confidence, 4)) | |
| else: | |
| # Real failure after all retries | |
| result = ("", 0.0, "", None) | |
| consecutive_failures += 1 | |
| word_indices.append(None) | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("retry_failed", segment_idx=segment_idx, | |
| tier1=True, tier2=tier2_entered) | |
| if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: | |
| consec_reanchors += 1 | |
| # Global re-anchor (not constrained to current surah) | |
| remaining_idx = first_quran_idx + i + 1 | |
| remaining_texts = phoneme_texts[remaining_idx:] | |
| if remaining_texts: | |
| reanchor_surah, reanchor_ayah = find_anchor_by_voting( | |
| remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS, | |
| ) | |
| if reanchor_surah > 0: | |
| if reanchor_surah != detected_surah: | |
| detected_surah = reanchor_surah | |
| chapter_ref = get_chapter_reference(detected_surah) | |
| pointer = verse_to_word_index(chapter_ref, reanchor_ayah) | |
| transition_expected_pointer = pointer | |
| print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, " | |
| f"Ayah {reanchor_ayah}, word {pointer}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("reanchor", at_segment=segment_idx, | |
| reason="consecutive_failures", | |
| new_surah=detected_surah, | |
| new_ayah=reanchor_ayah, new_pointer=pointer) | |
| consecutive_failures = 0 | |
| results.append(result) | |
| # Post-processing: detect consecutive segments with reference gaps | |
| # (gap_segments may already have entries from chapter-transition checks above) | |
| prev_matched_idx = None | |
| for idx in range(len(results)): | |
| if word_indices[idx] is None: | |
| continue | |
| if prev_matched_idx is not None: | |
| # Skip gap check across chapter transitions — word indices are per-chapter | |
| prev_ref = results[prev_matched_idx][2] | |
| curr_ref = results[idx][2] | |
| prev_surah = prev_ref.split(":")[0] if prev_ref and ":" in prev_ref else None | |
| curr_surah = curr_ref.split(":")[0] if curr_ref and ":" in curr_ref else None | |
| if prev_surah is not None and prev_surah == curr_surah: | |
| prev_end = word_indices[prev_matched_idx][1] | |
| curr_start = word_indices[idx][0] | |
| gap = curr_start - prev_end - 1 | |
| if gap > 0: | |
| gap_segments.add(prev_matched_idx) | |
| gap_segments.add(idx) | |
| print(f" [GAP] {gap} word(s) missing between segments " | |
| f"{prev_matched_idx + 1} and {idx + 1}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("gap", position="between", | |
| segment_before=prev_matched_idx + 1, | |
| segment_after=idx + 1, missing_words=gap) | |
| prev_matched_idx = idx | |
| # Edge case: missing words at start of expected range | |
| first_matched = next((i for i, w in enumerate(word_indices) if w is not None), None) | |
| if first_matched is not None: | |
| first_start = word_indices[first_matched][0] | |
| if first_start > start_pointer: | |
| gap_segments.add(first_matched) | |
| gap_count = first_start - start_pointer | |
| print(f" [GAP] {gap_count} word(s) missing before first segment {first_matched + 1}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("gap", position="before_first", | |
| segment_idx=first_matched + 1, missing_words=gap_count) | |
| # Edge case: missing words at end of current verse | |
| # Only flag if the last matched segment is also the final segment overall. | |
| # If there are trailing no-match segments after it, those account for the | |
| # remaining audio — the words aren't missing, they just failed to align. | |
| # Compare against the verse boundary (not chapter end), since a recitation | |
| # doesn't necessarily cover the entire chapter. | |
| last_matched = next((i for i in range(len(word_indices) - 1, -1, -1) if word_indices[i] is not None), None) | |
| if last_matched is not None and last_matched == len(word_indices) - 1: | |
| last_end = word_indices[last_matched][1] | |
| last_ayah = chapter_ref.words[last_end].ayah | |
| # Find the last word index that belongs to the same verse | |
| verse_end = last_end | |
| while verse_end + 1 < chapter_ref.num_words and chapter_ref.words[verse_end + 1].ayah == last_ayah: | |
| verse_end += 1 | |
| if last_end < verse_end: | |
| gap_segments.add(last_matched) | |
| gap_count = verse_end - last_end | |
| print(f" [GAP] {gap_count} word(s) missing after last segment {last_matched + 1}") | |
| _dc = get_debug_collector() | |
| if _dc is not None: | |
| _dc.add_event("gap", position="after_last", | |
| segment_idx=last_matched + 1, missing_words=gap_count) | |
| # Build profiling dict | |
| if PHONEME_ALIGNMENT_PROFILING: | |
| total_time = time.perf_counter() - total_start | |
| profiling = { | |
| "total_time": total_time, | |
| "ref_build_time": ref_build_time, | |
| "dp_total_time": sum(dp_times), | |
| "dp_min_time": min(dp_times) if dp_times else 0.0, | |
| "dp_max_time": max(dp_times) if dp_times else 0.0, | |
| "window_setup_time": window_setup_total, | |
| "result_build_time": result_build_total, | |
| "num_segments": num_segments, | |
| "tier1_attempts": tier1_attempts, | |
| "tier1_passed": tier1_passed, | |
| "tier1_segments": tier1_segments, | |
| "tier2_attempts": tier2_attempts, | |
| "tier2_passed": tier2_passed, | |
| "tier2_segments": tier2_segments, | |
| "consec_reanchors": consec_reanchors, | |
| "segments_attempted": segments_attempted, | |
| "segments_passed": segments_passed, | |
| "special_merges": special_merges, | |
| "transition_skips": transition_skips, | |
| "phoneme_wraps_detected": len(repetition_segments), | |
| } | |
| else: | |
| profiling = { | |
| "num_segments": num_segments, | |
| "tier1_attempts": tier1_attempts, | |
| "tier1_passed": tier1_passed, | |
| "tier1_segments": tier1_segments, | |
| "tier2_attempts": tier2_attempts, | |
| "tier2_passed": tier2_passed, | |
| "tier2_segments": tier2_segments, | |
| "consec_reanchors": consec_reanchors, | |
| "segments_attempted": segments_attempted, | |
| "segments_passed": segments_passed, | |
| "special_merges": special_merges, | |
| "transition_skips": transition_skips, | |
| "phoneme_wraps_detected": len(repetition_segments), | |
| } | |
| return results, profiling, gap_segments, merged_into, repetition_segments | |