"""Orchestration for phoneme-based alignment and retries.""" from typing import List, Tuple from config import ( ANCHOR_SEGMENTS, MAX_CONSECUTIVE_FAILURES, RETRY_LOOKBACK_WORDS, RETRY_LOOKAHEAD_WORDS, MAX_EDIT_DISTANCE_RELAXED, PHONEME_ALIGNMENT_PROFILING, ) from src.core.debug_collector import get_debug_collector def _debug_alignment_result(alignment, chapter_ref): """Extract JSON-safe dict from an AlignmentResult for the debug collector.""" if alignment is None: return None return { "matched_ref": alignment.matched_ref, "start_word_idx": alignment.start_word_idx, "end_word_idx": alignment.end_word_idx, "edit_cost": round(alignment.edit_cost, 4), "confidence": round(alignment.confidence, 4), "j_start": alignment.j_start, "best_j": alignment.best_j, "basmala_consumed": alignment.basmala_consumed, "n_wraps": alignment.n_wraps, "wrap_points": alignment.wrap_points, "wrap_word_ranges": alignment.wrap_word_ranges, } def run_phoneme_matching( phoneme_texts: List[List[str]], detected_surah: int, first_quran_idx: int = 0, special_results: List[tuple] = None, start_pointer: int = 0, ) -> Tuple[List[tuple], dict, set, dict, set]: """ Phoneme-based segment matching using substring DP. Args: phoneme_texts: List of phoneme lists (each is a list of phoneme strings) detected_surah: Surah number from anchor search first_quran_idx: Index where Quran segments start (after specials) special_results: Results for special segments (Isti'adha/Basmala) start_pointer: Initial word pointer from anchor voting Returns: (results, profiling_dict, gap_segments, merged_into, repetition_segments) results: List[(matched_text, score, matched_ref, wrap_word_ranges_or_None), ...] merged_into: dict mapping consumed segment indices to their target segment index repetition_segments: set of segment indices where wraps were detected """ from .phoneme_matcher import align_segment, get_matched_text from .phoneme_matcher_cache import get_chapter_reference from .phoneme_anchor import verse_to_word_index, find_anchor_by_voting from .ngram_index import get_ngram_index # Only import time if profiling enabled if PHONEME_ALIGNMENT_PROFILING: import time total_start = time.perf_counter() ref_build_start = time.perf_counter() # Build/get cached chapter reference (includes phonemizer call if not cached) chapter_ref = get_chapter_reference(detected_surah) if PHONEME_ALIGNMENT_PROFILING: ref_build_time = time.perf_counter() - ref_build_start # Initialize results with special segments results = list(special_results) if special_results else [] # Parallel list: None for specials/failures, (start_word_idx, end_word_idx) for matches word_indices = [None] * len(results) # Timing accumulators (only used if profiling enabled) if PHONEME_ALIGNMENT_PROFILING: dp_times = [] window_setup_total = 0.0 result_build_total = 0.0 # Track whether the next segment might have Basmala fused with verse content from .special_segments import ( SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT, detect_transition_segment, detect_inter_chapter_specials, ) basmala_already_detected = any( r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or []) ) is_first_after_transition = not basmala_already_detected special_merges = 0 # Transition segment state transition_mode = False transition_skips = 0 tahmeed_merge_skip = 0 merged_into = {} # {consumed_idx: target_idx} # Gap and repetition tracking gap_segments = set() repetition_segments = set() transition_expected_pointer = -1 # -1 = no pending check def _check_transition_gap(start_word_idx): """Flag missing words at start of new chapter after transition.""" nonlocal transition_expected_pointer if transition_expected_pointer < 0: return if start_word_idx > transition_expected_pointer: seg_idx = len(word_indices) - 1 gap_segments.add(seg_idx) gap = start_word_idx - transition_expected_pointer print(f" [GAP] {gap} word(s) missing at start of chapter after transition " f"(expected word {transition_expected_pointer}, got {start_word_idx})") transition_expected_pointer = -1 # Process Quran segments with phoneme alignment pointer = start_pointer num_segments = 0 consecutive_failures = 0 skip_count = 0 pending_specials = [] tier1_attempts = 0 tier1_passed = 0 tier1_segments = [] tier2_attempts = 0 tier2_passed = 0 tier2_segments = [] consec_reanchors = 0 segments_attempted = 0 segments_passed = 0 for i, asr_phonemes in enumerate(phoneme_texts[first_quran_idx:]): # Handle segments consumed by inter-chapter special detection if skip_count > 0: results.append(pending_specials.pop(0)) word_indices.append(None) skip_count -= 1 continue # Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments) if tahmeed_merge_skip > 0: # This segment's audio was merged into the previous Tahmeed segment results.append(("", 0.0, "", None)) word_indices.append(None) tahmeed_merge_skip -= 1 transition_skips += 1 continue segment_idx = first_quran_idx + i + 1 # 1-indexed for display segments_attempted += 1 # Transition mode: keep checking for transitions before trying alignment if transition_mode: trans_name, trans_conf = detect_transition_segment(asr_phonemes) if trans_name: print(f" [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})") _dc = get_debug_collector() if _dc is not None: _dc.add_event("transition_detected", segment_idx=segment_idx, transition_type=trans_name, confidence=round(trans_conf, 4), context="transition_mode") results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None)) word_indices.append(None) transition_skips += 1 # Tahmeed peek-ahead for merge if trans_name == "Tahmeed": next_abs = first_quran_idx + i + 1 if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]: resp_name, resp_conf = detect_transition_segment( phoneme_texts[next_abs], allowed={"Tahmeed"}) if resp_name: merged_into[next_abs] = first_quran_idx + i tahmeed_merge_skip = 1 print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed") if _dc is not None: _dc.add_event("tahmeed_merge", segment_idx=segment_idx, merged_segment=next_abs) continue else: # Exit transition mode, global reanchor transition_mode = False print(f" [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...") remaining_idx = first_quran_idx + i remaining_texts = phoneme_texts[remaining_idx:] if remaining_texts: reanchor_surah, reanchor_ayah = find_anchor_by_voting( remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS, ) if reanchor_surah > 0: if reanchor_surah != detected_surah: detected_surah = reanchor_surah chapter_ref = get_chapter_reference(detected_surah) pointer = verse_to_word_index(chapter_ref, reanchor_ayah) transition_expected_pointer = pointer print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, " f"Ayah {reanchor_ayah}, word {pointer}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("reanchor", at_segment=segment_idx, reason="transition_mode_exit", new_surah=detected_surah, new_ayah=reanchor_ayah, new_pointer=pointer) consecutive_failures = 0 # Fall through to normal alignment below alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx) num_segments += 1 # Accumulate timing if profiling enabled if PHONEME_ALIGNMENT_PROFILING: dp_times.append(timing['dp_time']) window_setup_total += timing['window_setup_time'] result_build_total += timing['result_build_time'] # Debug collector: primary alignment attempt _dc = get_debug_collector() if _dc is not None: _dc.add_alignment_result( segment_idx, asr_phonemes, window={"pointer": pointer, "surah": detected_surah}, expected_pointer=pointer, result=_debug_alignment_result(alignment, chapter_ref), timing=timing, ) # Chapter transition: pointer past end of chapter if alignment is None and pointer >= chapter_ref.num_words: remaining_phonemes = phoneme_texts[first_quran_idx + i:] amin_consumed = 0 if chapter_ref.surah == 1: # Check for Amin after Al-Fatiha before inter-chapter specials amin_name, amin_conf = detect_transition_segment( asr_phonemes, allowed={"Amin"}) if amin_name: print(f" [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})") results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin", None)) word_indices.append(None) transition_skips += 1 amin_consumed = 1 # Re-slice remaining phonemes to start after Amin remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:] inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes) if chapter_ref.surah == 1: # After Al-Fatiha, the next chapter could be anything — global reanchor print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, " f"running global reanchor...") _dc = get_debug_collector() if _dc is not None: _dc.add_event("chapter_end", at_segment=segment_idx, from_surah=1, next_action="global_reanchor") # Use segments after Amin + specials for anchor voting anchor_offset = first_quran_idx + i + amin_consumed + num_consumed anchor_remaining = phoneme_texts[anchor_offset:] reanchor_surah, reanchor_ayah = find_anchor_by_voting( anchor_remaining, get_ngram_index(), ANCHOR_SEGMENTS, ) if reanchor_surah > 0: next_surah = reanchor_surah chapter_ref = get_chapter_reference(next_surah) pointer = verse_to_word_index(chapter_ref, reanchor_ayah) # Don't set transition_expected_pointer — after Surah 1 the next # chapter is arbitrary (global reanchor), so gaps are expected. print(f" [GLOBAL-REANCHOR] Anchored to Surah {next_surah}, " f"Ayah {reanchor_ayah}, word {pointer}") else: # Fallback: assume chapter 2 next_surah = 2 chapter_ref = get_chapter_reference(next_surah) pointer = 0 print(f" [GLOBAL-REANCHOR] No anchor found, falling back to Surah 2") else: next_surah = chapter_ref.surah + 1 if next_surah > 114: pass # No more chapters — fall through to failure handling else: # Check for transition before committing to next sequential surah if num_consumed == 0: trans_name, trans_conf = detect_transition_segment(asr_phonemes) if trans_name: print(f" [CHAPTER-END-TRANSITION] Segment {segment_idx}: {trans_name} " f"at end of Surah {chapter_ref.surah} (conf={trans_conf:.2f})") results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None)) word_indices.append(None) transition_skips += 1 transition_mode = True detected_surah = next_surah chapter_ref = get_chapter_reference(next_surah) pointer = 0 transition_expected_pointer = 0 consecutive_failures = 0 continue print(f" [CHAPTER-END] Surah {chapter_ref.surah} complete at segment {segment_idx}, " f"transitioning to Surah {next_surah}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("chapter_transition", at_segment=segment_idx, from_surah=chapter_ref.surah, to_surah=next_surah) chapter_ref = get_chapter_reference(next_surah) pointer = 0 transition_expected_pointer = 0 if next_surah <= 114: detected_surah = next_surah consecutive_failures = 0 if amin_consumed > 0: # Current segment was Amin (already appended above). # Queue inter-chapter specials for subsequent segments. has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials) is_first_after_transition = not has_basmala if num_consumed > 0: pending_specials = list(inter_specials) skip_count = num_consumed else: is_first_after_transition = True continue if num_consumed > 0: has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials) is_first_after_transition = not has_basmala # Current segment is a special — append its result results.append(inter_specials[0]) word_indices.append(None) # Queue remaining specials for subsequent segments if num_consumed > 1: pending_specials = list(inter_specials[1:]) skip_count = num_consumed - 1 continue else: is_first_after_transition = True # No specials — re-try alignment on this segment against the new chapter alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx) num_segments += 1 if PHONEME_ALIGNMENT_PROFILING: dp_times.append(timing['dp_time']) window_setup_total += timing['window_setup_time'] result_build_total += timing['result_build_time'] # Fall through to existing if/else below # Basmala-fused retry: if this is the first segment after a transition # and Basmala wasn't detected, the reciter may have merged Basmala with # the first verse. Always try prepending Basmala phonemes to R and pick # the better result (even if the plain alignment already succeeded). if is_first_after_transition: is_first_after_transition = False basmala_alignment, basmala_timing = align_segment( asr_phonemes, chapter_ref, pointer, segment_idx, basmala_prefix=True) num_segments += 1 if PHONEME_ALIGNMENT_PROFILING: dp_times.append(basmala_timing['dp_time']) window_setup_total += basmala_timing['window_setup_time'] result_build_total += basmala_timing['result_build_time'] if basmala_alignment and basmala_alignment.basmala_consumed: existing_conf = alignment.confidence if alignment else 0.0 if basmala_alignment.confidence > existing_conf: matched_text = SPECIAL_TEXT["Basmala"] + " " + get_matched_text(chapter_ref, basmala_alignment) result = (matched_text, basmala_alignment.confidence, basmala_alignment.matched_ref, basmala_alignment.wrap_word_ranges) pointer = basmala_alignment.end_word_idx + 1 consecutive_failures = 0 word_indices.append((basmala_alignment.start_word_idx, basmala_alignment.end_word_idx)) _check_transition_gap(basmala_alignment.start_word_idx) if basmala_alignment.n_wraps > 0: repetition_segments.add(len(results)) results.append(result) special_merges += 1 segments_passed += 1 print(f" [BASMALA-FUSED] Segment {segment_idx}: Basmala merged with verse " f"(fused conf={basmala_alignment.confidence:.2f} > plain conf={existing_conf:.2f})") _dc = get_debug_collector() if _dc is not None: _dc.add_event("basmala_fused", segment_idx=segment_idx, fused_conf=round(basmala_alignment.confidence, 4), plain_conf=round(existing_conf, 4), chose="fused") continue # Basmala-fused didn't win — fall through with original alignment if alignment: is_first_after_transition = False matched_text = get_matched_text(chapter_ref, alignment) result = (matched_text, alignment.confidence, alignment.matched_ref, alignment.wrap_word_ranges) pointer = alignment.end_word_idx + 1 # Advance pointer consecutive_failures = 0 word_indices.append((alignment.start_word_idx, alignment.end_word_idx)) _check_transition_gap(alignment.start_word_idx) if alignment.n_wraps > 0: repetition_segments.add(len(results)) segments_passed += 1 else: # === Check for transition segment before retry tiers === trans_name, trans_conf = detect_transition_segment(asr_phonemes) if trans_name: print(f" [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})") _dc = get_debug_collector() if _dc is not None: _dc.add_event("transition_detected", segment_idx=segment_idx, transition_type=trans_name, confidence=round(trans_conf, 4), context="pre_retry") result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name, None) word_indices.append(None) transition_skips += 1 transition_mode = True # Tahmeed peek-ahead for merge if trans_name == "Tahmeed": next_abs = first_quran_idx + i + 1 if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]: resp_name, resp_conf = detect_transition_segment( phoneme_texts[next_abs], allowed={"Tahmeed"}) if resp_name: merged_into[next_abs] = first_quran_idx + i tahmeed_merge_skip = 1 print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed") if _dc is not None: _dc.add_event("tahmeed_merge", segment_idx=segment_idx, merged_segment=next_abs) results.append(result) continue # === Graduated retry === # Tier 1: expanded window, same threshold tier1_attempts += 1 tier1_segments.append(segment_idx) alignment, timing = align_segment( asr_phonemes, chapter_ref, pointer, segment_idx, lookback_override=RETRY_LOOKBACK_WORDS, lookahead_override=RETRY_LOOKAHEAD_WORDS, ) num_segments += 1 if PHONEME_ALIGNMENT_PROFILING: dp_times.append(timing['dp_time']) window_setup_total += timing['window_setup_time'] result_build_total += timing['result_build_time'] # Tier 2: expanded window + relaxed threshold tier2_entered = False if alignment is None: tier2_entered = True tier2_attempts += 1 tier2_segments.append(segment_idx) alignment, timing = align_segment( asr_phonemes, chapter_ref, pointer, segment_idx, lookback_override=RETRY_LOOKBACK_WORDS, lookahead_override=RETRY_LOOKAHEAD_WORDS, max_edit_distance_override=MAX_EDIT_DISTANCE_RELAXED, ) num_segments += 1 if PHONEME_ALIGNMENT_PROFILING: dp_times.append(timing['dp_time']) window_setup_total += timing['window_setup_time'] result_build_total += timing['result_build_time'] if alignment: # Retry succeeded is_first_after_transition = False matched_text = get_matched_text(chapter_ref, alignment) result = (matched_text, alignment.confidence, alignment.matched_ref, alignment.wrap_word_ranges) pointer = alignment.end_word_idx + 1 consecutive_failures = 0 word_indices.append((alignment.start_word_idx, alignment.end_word_idx)) _check_transition_gap(alignment.start_word_idx) if alignment.n_wraps > 0: repetition_segments.add(len(results)) segments_passed += 1 tier_name = "tier2" if tier2_entered else "tier1" if tier2_entered: tier2_passed += 1 else: tier1_passed += 1 print(f" [RETRY-OK] Segment {segment_idx}: recovered via expanded window/relaxed threshold") _dc = get_debug_collector() if _dc is not None: _dc.add_alignment_result( segment_idx, asr_phonemes, window={"pointer": pointer - 1, "surah": detected_surah}, expected_pointer=pointer - 1, result=_debug_alignment_result(alignment, chapter_ref), timing=timing, retry_tier=tier_name, ) _dc.add_event(f"retry_{tier_name}", segment_idx=segment_idx, passed=True, confidence=round(alignment.confidence, 4)) else: # Real failure after all retries result = ("", 0.0, "", None) consecutive_failures += 1 word_indices.append(None) _dc = get_debug_collector() if _dc is not None: _dc.add_event("retry_failed", segment_idx=segment_idx, tier1=True, tier2=tier2_entered) if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: consec_reanchors += 1 # Global re-anchor (not constrained to current surah) remaining_idx = first_quran_idx + i + 1 remaining_texts = phoneme_texts[remaining_idx:] if remaining_texts: reanchor_surah, reanchor_ayah = find_anchor_by_voting( remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS, ) if reanchor_surah > 0: if reanchor_surah != detected_surah: detected_surah = reanchor_surah chapter_ref = get_chapter_reference(detected_surah) pointer = verse_to_word_index(chapter_ref, reanchor_ayah) transition_expected_pointer = pointer print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, " f"Ayah {reanchor_ayah}, word {pointer}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("reanchor", at_segment=segment_idx, reason="consecutive_failures", new_surah=detected_surah, new_ayah=reanchor_ayah, new_pointer=pointer) consecutive_failures = 0 results.append(result) # Post-processing: detect consecutive segments with reference gaps # (gap_segments may already have entries from chapter-transition checks above) prev_matched_idx = None for idx in range(len(results)): if word_indices[idx] is None: continue if prev_matched_idx is not None: # Skip gap check across chapter transitions — word indices are per-chapter prev_ref = results[prev_matched_idx][2] curr_ref = results[idx][2] prev_surah = prev_ref.split(":")[0] if prev_ref and ":" in prev_ref else None curr_surah = curr_ref.split(":")[0] if curr_ref and ":" in curr_ref else None if prev_surah is not None and prev_surah == curr_surah: prev_end = word_indices[prev_matched_idx][1] curr_start = word_indices[idx][0] gap = curr_start - prev_end - 1 if gap > 0: gap_segments.add(prev_matched_idx) gap_segments.add(idx) print(f" [GAP] {gap} word(s) missing between segments " f"{prev_matched_idx + 1} and {idx + 1}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("gap", position="between", segment_before=prev_matched_idx + 1, segment_after=idx + 1, missing_words=gap) prev_matched_idx = idx # Edge case: missing words at start of expected range first_matched = next((i for i, w in enumerate(word_indices) if w is not None), None) if first_matched is not None: first_start = word_indices[first_matched][0] if first_start > start_pointer: gap_segments.add(first_matched) gap_count = first_start - start_pointer print(f" [GAP] {gap_count} word(s) missing before first segment {first_matched + 1}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("gap", position="before_first", segment_idx=first_matched + 1, missing_words=gap_count) # Edge case: missing words at end of current verse # Only flag if the last matched segment is also the final segment overall. # If there are trailing no-match segments after it, those account for the # remaining audio — the words aren't missing, they just failed to align. # Compare against the verse boundary (not chapter end), since a recitation # doesn't necessarily cover the entire chapter. last_matched = next((i for i in range(len(word_indices) - 1, -1, -1) if word_indices[i] is not None), None) if last_matched is not None and last_matched == len(word_indices) - 1: last_end = word_indices[last_matched][1] last_ayah = chapter_ref.words[last_end].ayah # Find the last word index that belongs to the same verse verse_end = last_end while verse_end + 1 < chapter_ref.num_words and chapter_ref.words[verse_end + 1].ayah == last_ayah: verse_end += 1 if last_end < verse_end: gap_segments.add(last_matched) gap_count = verse_end - last_end print(f" [GAP] {gap_count} word(s) missing after last segment {last_matched + 1}") _dc = get_debug_collector() if _dc is not None: _dc.add_event("gap", position="after_last", segment_idx=last_matched + 1, missing_words=gap_count) # Build profiling dict if PHONEME_ALIGNMENT_PROFILING: total_time = time.perf_counter() - total_start profiling = { "total_time": total_time, "ref_build_time": ref_build_time, "dp_total_time": sum(dp_times), "dp_min_time": min(dp_times) if dp_times else 0.0, "dp_max_time": max(dp_times) if dp_times else 0.0, "window_setup_time": window_setup_total, "result_build_time": result_build_total, "num_segments": num_segments, "tier1_attempts": tier1_attempts, "tier1_passed": tier1_passed, "tier1_segments": tier1_segments, "tier2_attempts": tier2_attempts, "tier2_passed": tier2_passed, "tier2_segments": tier2_segments, "consec_reanchors": consec_reanchors, "segments_attempted": segments_attempted, "segments_passed": segments_passed, "special_merges": special_merges, "transition_skips": transition_skips, "phoneme_wraps_detected": len(repetition_segments), } else: profiling = { "num_segments": num_segments, "tier1_attempts": tier1_attempts, "tier1_passed": tier1_passed, "tier1_segments": tier1_segments, "tier2_attempts": tier2_attempts, "tier2_passed": tier2_passed, "tier2_segments": tier2_segments, "consec_reanchors": consec_reanchors, "segments_attempted": segments_attempted, "segments_passed": segments_passed, "special_merges": special_merges, "transition_skips": transition_skips, "phoneme_wraps_detected": len(repetition_segments), } return results, profiling, gap_segments, merged_into, repetition_segments