quranic-universal-aligner / src /alignment /alignment_pipeline.py
hetchyy's picture
Upload folder using huggingface_hub
602b5d3 verified
"""Orchestration for phoneme-based alignment and retries."""
from typing import List, Tuple
from config import (
ANCHOR_SEGMENTS,
MAX_CONSECUTIVE_FAILURES,
RETRY_LOOKBACK_WORDS,
RETRY_LOOKAHEAD_WORDS,
MAX_EDIT_DISTANCE_RELAXED,
PHONEME_ALIGNMENT_PROFILING,
)
from src.core.debug_collector import get_debug_collector
def _debug_alignment_result(alignment, chapter_ref):
"""Extract JSON-safe dict from an AlignmentResult for the debug collector."""
if alignment is None:
return None
return {
"matched_ref": alignment.matched_ref,
"start_word_idx": alignment.start_word_idx,
"end_word_idx": alignment.end_word_idx,
"edit_cost": round(alignment.edit_cost, 4),
"confidence": round(alignment.confidence, 4),
"j_start": alignment.j_start,
"best_j": alignment.best_j,
"basmala_consumed": alignment.basmala_consumed,
"n_wraps": alignment.n_wraps,
"wrap_points": alignment.wrap_points,
"wrap_word_ranges": alignment.wrap_word_ranges,
}
def run_phoneme_matching(
phoneme_texts: List[List[str]],
detected_surah: int,
first_quran_idx: int = 0,
special_results: List[tuple] = None,
start_pointer: int = 0,
) -> Tuple[List[tuple], dict, set, dict, set]:
"""
Phoneme-based segment matching using substring DP.
Args:
phoneme_texts: List of phoneme lists (each is a list of phoneme strings)
detected_surah: Surah number from anchor search
first_quran_idx: Index where Quran segments start (after specials)
special_results: Results for special segments (Isti'adha/Basmala)
start_pointer: Initial word pointer from anchor voting
Returns:
(results, profiling_dict, gap_segments, merged_into, repetition_segments)
results: List[(matched_text, score, matched_ref, wrap_word_ranges_or_None), ...]
merged_into: dict mapping consumed segment indices to their target segment index
repetition_segments: set of segment indices where wraps were detected
"""
from .phoneme_matcher import align_segment, get_matched_text
from .phoneme_matcher_cache import get_chapter_reference
from .phoneme_anchor import verse_to_word_index, find_anchor_by_voting
from .ngram_index import get_ngram_index
# Only import time if profiling enabled
if PHONEME_ALIGNMENT_PROFILING:
import time
total_start = time.perf_counter()
ref_build_start = time.perf_counter()
# Build/get cached chapter reference (includes phonemizer call if not cached)
chapter_ref = get_chapter_reference(detected_surah)
if PHONEME_ALIGNMENT_PROFILING:
ref_build_time = time.perf_counter() - ref_build_start
# Initialize results with special segments
results = list(special_results) if special_results else []
# Parallel list: None for specials/failures, (start_word_idx, end_word_idx) for matches
word_indices = [None] * len(results)
# Timing accumulators (only used if profiling enabled)
if PHONEME_ALIGNMENT_PROFILING:
dp_times = []
window_setup_total = 0.0
result_build_total = 0.0
# Track whether the next segment might have Basmala fused with verse content
from .special_segments import (
SPECIAL_PHONEMES, SPECIAL_TEXT, TRANSITION_TEXT,
detect_transition_segment, detect_inter_chapter_specials,
)
basmala_already_detected = any(
r[2] in ("Basmala", "Isti'adha+Basmala") for r in (special_results or [])
)
is_first_after_transition = not basmala_already_detected
special_merges = 0
# Transition segment state
transition_mode = False
transition_skips = 0
tahmeed_merge_skip = 0
merged_into = {} # {consumed_idx: target_idx}
# Gap and repetition tracking
gap_segments = set()
repetition_segments = set()
transition_expected_pointer = -1 # -1 = no pending check
def _check_transition_gap(start_word_idx):
"""Flag missing words at start of new chapter after transition."""
nonlocal transition_expected_pointer
if transition_expected_pointer < 0:
return
if start_word_idx > transition_expected_pointer:
seg_idx = len(word_indices) - 1
gap_segments.add(seg_idx)
gap = start_word_idx - transition_expected_pointer
print(f" [GAP] {gap} word(s) missing at start of chapter after transition "
f"(expected word {transition_expected_pointer}, got {start_word_idx})")
transition_expected_pointer = -1
# Process Quran segments with phoneme alignment
pointer = start_pointer
num_segments = 0
consecutive_failures = 0
skip_count = 0
pending_specials = []
tier1_attempts = 0
tier1_passed = 0
tier1_segments = []
tier2_attempts = 0
tier2_passed = 0
tier2_segments = []
consec_reanchors = 0
segments_attempted = 0
segments_passed = 0
for i, asr_phonemes in enumerate(phoneme_texts[first_quran_idx:]):
# Handle segments consumed by inter-chapter special detection
if skip_count > 0:
results.append(pending_specials.pop(0))
word_indices.append(None)
skip_count -= 1
continue
# Handle segments consumed by Tahmeed merge (sami'a + rabbana in separate segments)
if tahmeed_merge_skip > 0:
# This segment's audio was merged into the previous Tahmeed segment
results.append(("", 0.0, "", None))
word_indices.append(None)
tahmeed_merge_skip -= 1
transition_skips += 1
continue
segment_idx = first_quran_idx + i + 1 # 1-indexed for display
segments_attempted += 1
# Transition mode: keep checking for transitions before trying alignment
if transition_mode:
trans_name, trans_conf = detect_transition_segment(asr_phonemes)
if trans_name:
print(f" [TRANSITION-MODE] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("transition_detected", segment_idx=segment_idx,
transition_type=trans_name, confidence=round(trans_conf, 4),
context="transition_mode")
results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None))
word_indices.append(None)
transition_skips += 1
# Tahmeed peek-ahead for merge
if trans_name == "Tahmeed":
next_abs = first_quran_idx + i + 1
if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
resp_name, resp_conf = detect_transition_segment(
phoneme_texts[next_abs], allowed={"Tahmeed"})
if resp_name:
merged_into[next_abs] = first_quran_idx + i
tahmeed_merge_skip = 1
print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
if _dc is not None:
_dc.add_event("tahmeed_merge", segment_idx=segment_idx,
merged_segment=next_abs)
continue
else:
# Exit transition mode, global reanchor
transition_mode = False
print(f" [TRANSITION-MODE] Exiting at segment {segment_idx}, running global reanchor...")
remaining_idx = first_quran_idx + i
remaining_texts = phoneme_texts[remaining_idx:]
if remaining_texts:
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS,
)
if reanchor_surah > 0:
if reanchor_surah != detected_surah:
detected_surah = reanchor_surah
chapter_ref = get_chapter_reference(detected_surah)
pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
transition_expected_pointer = pointer
print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, "
f"Ayah {reanchor_ayah}, word {pointer}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("reanchor", at_segment=segment_idx,
reason="transition_mode_exit",
new_surah=detected_surah,
new_ayah=reanchor_ayah, new_pointer=pointer)
consecutive_failures = 0
# Fall through to normal alignment below
alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
num_segments += 1
# Accumulate timing if profiling enabled
if PHONEME_ALIGNMENT_PROFILING:
dp_times.append(timing['dp_time'])
window_setup_total += timing['window_setup_time']
result_build_total += timing['result_build_time']
# Debug collector: primary alignment attempt
_dc = get_debug_collector()
if _dc is not None:
_dc.add_alignment_result(
segment_idx, asr_phonemes,
window={"pointer": pointer, "surah": detected_surah},
expected_pointer=pointer,
result=_debug_alignment_result(alignment, chapter_ref),
timing=timing,
)
# Chapter transition: pointer past end of chapter
if alignment is None and pointer >= chapter_ref.num_words:
remaining_phonemes = phoneme_texts[first_quran_idx + i:]
amin_consumed = 0
if chapter_ref.surah == 1:
# Check for Amin after Al-Fatiha before inter-chapter specials
amin_name, amin_conf = detect_transition_segment(
asr_phonemes, allowed={"Amin"})
if amin_name:
print(f" [AMIN] Detected after Surah 1 (conf={amin_conf:.2f})")
results.append((TRANSITION_TEXT["Amin"], amin_conf, "Amin", None))
word_indices.append(None)
transition_skips += 1
amin_consumed = 1
# Re-slice remaining phonemes to start after Amin
remaining_phonemes = phoneme_texts[first_quran_idx + i + 1:]
inter_specials, num_consumed = detect_inter_chapter_specials(remaining_phonemes)
if chapter_ref.surah == 1:
# After Al-Fatiha, the next chapter could be anything — global reanchor
print(f" [CHAPTER-END] Surah 1 complete at segment {segment_idx}, "
f"running global reanchor...")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("chapter_end", at_segment=segment_idx,
from_surah=1, next_action="global_reanchor")
# Use segments after Amin + specials for anchor voting
anchor_offset = first_quran_idx + i + amin_consumed + num_consumed
anchor_remaining = phoneme_texts[anchor_offset:]
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
anchor_remaining, get_ngram_index(), ANCHOR_SEGMENTS,
)
if reanchor_surah > 0:
next_surah = reanchor_surah
chapter_ref = get_chapter_reference(next_surah)
pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
# Don't set transition_expected_pointer — after Surah 1 the next
# chapter is arbitrary (global reanchor), so gaps are expected.
print(f" [GLOBAL-REANCHOR] Anchored to Surah {next_surah}, "
f"Ayah {reanchor_ayah}, word {pointer}")
else:
# Fallback: assume chapter 2
next_surah = 2
chapter_ref = get_chapter_reference(next_surah)
pointer = 0
print(f" [GLOBAL-REANCHOR] No anchor found, falling back to Surah 2")
else:
next_surah = chapter_ref.surah + 1
if next_surah > 114:
pass # No more chapters — fall through to failure handling
else:
# Check for transition before committing to next sequential surah
if num_consumed == 0:
trans_name, trans_conf = detect_transition_segment(asr_phonemes)
if trans_name:
print(f" [CHAPTER-END-TRANSITION] Segment {segment_idx}: {trans_name} "
f"at end of Surah {chapter_ref.surah} (conf={trans_conf:.2f})")
results.append((TRANSITION_TEXT[trans_name], trans_conf, trans_name, None))
word_indices.append(None)
transition_skips += 1
transition_mode = True
detected_surah = next_surah
chapter_ref = get_chapter_reference(next_surah)
pointer = 0
transition_expected_pointer = 0
consecutive_failures = 0
continue
print(f" [CHAPTER-END] Surah {chapter_ref.surah} complete at segment {segment_idx}, "
f"transitioning to Surah {next_surah}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("chapter_transition", at_segment=segment_idx,
from_surah=chapter_ref.surah, to_surah=next_surah)
chapter_ref = get_chapter_reference(next_surah)
pointer = 0
transition_expected_pointer = 0
if next_surah <= 114:
detected_surah = next_surah
consecutive_failures = 0
if amin_consumed > 0:
# Current segment was Amin (already appended above).
# Queue inter-chapter specials for subsequent segments.
has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
is_first_after_transition = not has_basmala
if num_consumed > 0:
pending_specials = list(inter_specials)
skip_count = num_consumed
else:
is_first_after_transition = True
continue
if num_consumed > 0:
has_basmala = any(s[2] in ("Basmala", "Isti'adha+Basmala") for s in inter_specials)
is_first_after_transition = not has_basmala
# Current segment is a special — append its result
results.append(inter_specials[0])
word_indices.append(None)
# Queue remaining specials for subsequent segments
if num_consumed > 1:
pending_specials = list(inter_specials[1:])
skip_count = num_consumed - 1
continue
else:
is_first_after_transition = True
# No specials — re-try alignment on this segment against the new chapter
alignment, timing = align_segment(asr_phonemes, chapter_ref, pointer, segment_idx)
num_segments += 1
if PHONEME_ALIGNMENT_PROFILING:
dp_times.append(timing['dp_time'])
window_setup_total += timing['window_setup_time']
result_build_total += timing['result_build_time']
# Fall through to existing if/else below
# Basmala-fused retry: if this is the first segment after a transition
# and Basmala wasn't detected, the reciter may have merged Basmala with
# the first verse. Always try prepending Basmala phonemes to R and pick
# the better result (even if the plain alignment already succeeded).
if is_first_after_transition:
is_first_after_transition = False
basmala_alignment, basmala_timing = align_segment(
asr_phonemes, chapter_ref, pointer, segment_idx,
basmala_prefix=True)
num_segments += 1
if PHONEME_ALIGNMENT_PROFILING:
dp_times.append(basmala_timing['dp_time'])
window_setup_total += basmala_timing['window_setup_time']
result_build_total += basmala_timing['result_build_time']
if basmala_alignment and basmala_alignment.basmala_consumed:
existing_conf = alignment.confidence if alignment else 0.0
if basmala_alignment.confidence > existing_conf:
matched_text = SPECIAL_TEXT["Basmala"] + " " + get_matched_text(chapter_ref, basmala_alignment)
result = (matched_text, basmala_alignment.confidence, basmala_alignment.matched_ref,
basmala_alignment.wrap_word_ranges)
pointer = basmala_alignment.end_word_idx + 1
consecutive_failures = 0
word_indices.append((basmala_alignment.start_word_idx, basmala_alignment.end_word_idx))
_check_transition_gap(basmala_alignment.start_word_idx)
if basmala_alignment.n_wraps > 0:
repetition_segments.add(len(results))
results.append(result)
special_merges += 1
segments_passed += 1
print(f" [BASMALA-FUSED] Segment {segment_idx}: Basmala merged with verse "
f"(fused conf={basmala_alignment.confidence:.2f} > plain conf={existing_conf:.2f})")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("basmala_fused", segment_idx=segment_idx,
fused_conf=round(basmala_alignment.confidence, 4),
plain_conf=round(existing_conf, 4), chose="fused")
continue
# Basmala-fused didn't win — fall through with original alignment
if alignment:
is_first_after_transition = False
matched_text = get_matched_text(chapter_ref, alignment)
result = (matched_text, alignment.confidence, alignment.matched_ref,
alignment.wrap_word_ranges)
pointer = alignment.end_word_idx + 1 # Advance pointer
consecutive_failures = 0
word_indices.append((alignment.start_word_idx, alignment.end_word_idx))
_check_transition_gap(alignment.start_word_idx)
if alignment.n_wraps > 0:
repetition_segments.add(len(results))
segments_passed += 1
else:
# === Check for transition segment before retry tiers ===
trans_name, trans_conf = detect_transition_segment(asr_phonemes)
if trans_name:
print(f" [TRANSITION] Segment {segment_idx}: {trans_name} (conf={trans_conf:.2f})")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("transition_detected", segment_idx=segment_idx,
transition_type=trans_name, confidence=round(trans_conf, 4),
context="pre_retry")
result = (TRANSITION_TEXT[trans_name], trans_conf, trans_name, None)
word_indices.append(None)
transition_skips += 1
transition_mode = True
# Tahmeed peek-ahead for merge
if trans_name == "Tahmeed":
next_abs = first_quran_idx + i + 1
if next_abs < len(phoneme_texts) and phoneme_texts[next_abs]:
resp_name, resp_conf = detect_transition_segment(
phoneme_texts[next_abs], allowed={"Tahmeed"})
if resp_name:
merged_into[next_abs] = first_quran_idx + i
tahmeed_merge_skip = 1
print(f" [TAHMEED-MERGE] Next segment merged into Tahmeed")
if _dc is not None:
_dc.add_event("tahmeed_merge", segment_idx=segment_idx,
merged_segment=next_abs)
results.append(result)
continue
# === Graduated retry ===
# Tier 1: expanded window, same threshold
tier1_attempts += 1
tier1_segments.append(segment_idx)
alignment, timing = align_segment(
asr_phonemes, chapter_ref, pointer, segment_idx,
lookback_override=RETRY_LOOKBACK_WORDS,
lookahead_override=RETRY_LOOKAHEAD_WORDS,
)
num_segments += 1
if PHONEME_ALIGNMENT_PROFILING:
dp_times.append(timing['dp_time'])
window_setup_total += timing['window_setup_time']
result_build_total += timing['result_build_time']
# Tier 2: expanded window + relaxed threshold
tier2_entered = False
if alignment is None:
tier2_entered = True
tier2_attempts += 1
tier2_segments.append(segment_idx)
alignment, timing = align_segment(
asr_phonemes, chapter_ref, pointer, segment_idx,
lookback_override=RETRY_LOOKBACK_WORDS,
lookahead_override=RETRY_LOOKAHEAD_WORDS,
max_edit_distance_override=MAX_EDIT_DISTANCE_RELAXED,
)
num_segments += 1
if PHONEME_ALIGNMENT_PROFILING:
dp_times.append(timing['dp_time'])
window_setup_total += timing['window_setup_time']
result_build_total += timing['result_build_time']
if alignment:
# Retry succeeded
is_first_after_transition = False
matched_text = get_matched_text(chapter_ref, alignment)
result = (matched_text, alignment.confidence, alignment.matched_ref,
alignment.wrap_word_ranges)
pointer = alignment.end_word_idx + 1
consecutive_failures = 0
word_indices.append((alignment.start_word_idx, alignment.end_word_idx))
_check_transition_gap(alignment.start_word_idx)
if alignment.n_wraps > 0:
repetition_segments.add(len(results))
segments_passed += 1
tier_name = "tier2" if tier2_entered else "tier1"
if tier2_entered:
tier2_passed += 1
else:
tier1_passed += 1
print(f" [RETRY-OK] Segment {segment_idx}: recovered via expanded window/relaxed threshold")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_alignment_result(
segment_idx, asr_phonemes,
window={"pointer": pointer - 1, "surah": detected_surah},
expected_pointer=pointer - 1,
result=_debug_alignment_result(alignment, chapter_ref),
timing=timing, retry_tier=tier_name,
)
_dc.add_event(f"retry_{tier_name}", segment_idx=segment_idx,
passed=True, confidence=round(alignment.confidence, 4))
else:
# Real failure after all retries
result = ("", 0.0, "", None)
consecutive_failures += 1
word_indices.append(None)
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("retry_failed", segment_idx=segment_idx,
tier1=True, tier2=tier2_entered)
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
consec_reanchors += 1
# Global re-anchor (not constrained to current surah)
remaining_idx = first_quran_idx + i + 1
remaining_texts = phoneme_texts[remaining_idx:]
if remaining_texts:
reanchor_surah, reanchor_ayah = find_anchor_by_voting(
remaining_texts, get_ngram_index(), ANCHOR_SEGMENTS,
)
if reanchor_surah > 0:
if reanchor_surah != detected_surah:
detected_surah = reanchor_surah
chapter_ref = get_chapter_reference(detected_surah)
pointer = verse_to_word_index(chapter_ref, reanchor_ayah)
transition_expected_pointer = pointer
print(f" [GLOBAL-REANCHOR] Jumped to Surah {detected_surah}, "
f"Ayah {reanchor_ayah}, word {pointer}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("reanchor", at_segment=segment_idx,
reason="consecutive_failures",
new_surah=detected_surah,
new_ayah=reanchor_ayah, new_pointer=pointer)
consecutive_failures = 0
results.append(result)
# Post-processing: detect consecutive segments with reference gaps
# (gap_segments may already have entries from chapter-transition checks above)
prev_matched_idx = None
for idx in range(len(results)):
if word_indices[idx] is None:
continue
if prev_matched_idx is not None:
# Skip gap check across chapter transitions — word indices are per-chapter
prev_ref = results[prev_matched_idx][2]
curr_ref = results[idx][2]
prev_surah = prev_ref.split(":")[0] if prev_ref and ":" in prev_ref else None
curr_surah = curr_ref.split(":")[0] if curr_ref and ":" in curr_ref else None
if prev_surah is not None and prev_surah == curr_surah:
prev_end = word_indices[prev_matched_idx][1]
curr_start = word_indices[idx][0]
gap = curr_start - prev_end - 1
if gap > 0:
gap_segments.add(prev_matched_idx)
gap_segments.add(idx)
print(f" [GAP] {gap} word(s) missing between segments "
f"{prev_matched_idx + 1} and {idx + 1}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("gap", position="between",
segment_before=prev_matched_idx + 1,
segment_after=idx + 1, missing_words=gap)
prev_matched_idx = idx
# Edge case: missing words at start of expected range
first_matched = next((i for i, w in enumerate(word_indices) if w is not None), None)
if first_matched is not None:
first_start = word_indices[first_matched][0]
if first_start > start_pointer:
gap_segments.add(first_matched)
gap_count = first_start - start_pointer
print(f" [GAP] {gap_count} word(s) missing before first segment {first_matched + 1}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("gap", position="before_first",
segment_idx=first_matched + 1, missing_words=gap_count)
# Edge case: missing words at end of current verse
# Only flag if the last matched segment is also the final segment overall.
# If there are trailing no-match segments after it, those account for the
# remaining audio — the words aren't missing, they just failed to align.
# Compare against the verse boundary (not chapter end), since a recitation
# doesn't necessarily cover the entire chapter.
last_matched = next((i for i in range(len(word_indices) - 1, -1, -1) if word_indices[i] is not None), None)
if last_matched is not None and last_matched == len(word_indices) - 1:
last_end = word_indices[last_matched][1]
last_ayah = chapter_ref.words[last_end].ayah
# Find the last word index that belongs to the same verse
verse_end = last_end
while verse_end + 1 < chapter_ref.num_words and chapter_ref.words[verse_end + 1].ayah == last_ayah:
verse_end += 1
if last_end < verse_end:
gap_segments.add(last_matched)
gap_count = verse_end - last_end
print(f" [GAP] {gap_count} word(s) missing after last segment {last_matched + 1}")
_dc = get_debug_collector()
if _dc is not None:
_dc.add_event("gap", position="after_last",
segment_idx=last_matched + 1, missing_words=gap_count)
# Build profiling dict
if PHONEME_ALIGNMENT_PROFILING:
total_time = time.perf_counter() - total_start
profiling = {
"total_time": total_time,
"ref_build_time": ref_build_time,
"dp_total_time": sum(dp_times),
"dp_min_time": min(dp_times) if dp_times else 0.0,
"dp_max_time": max(dp_times) if dp_times else 0.0,
"window_setup_time": window_setup_total,
"result_build_time": result_build_total,
"num_segments": num_segments,
"tier1_attempts": tier1_attempts,
"tier1_passed": tier1_passed,
"tier1_segments": tier1_segments,
"tier2_attempts": tier2_attempts,
"tier2_passed": tier2_passed,
"tier2_segments": tier2_segments,
"consec_reanchors": consec_reanchors,
"segments_attempted": segments_attempted,
"segments_passed": segments_passed,
"special_merges": special_merges,
"transition_skips": transition_skips,
"phoneme_wraps_detected": len(repetition_segments),
}
else:
profiling = {
"num_segments": num_segments,
"tier1_attempts": tier1_attempts,
"tier1_passed": tier1_passed,
"tier1_segments": tier1_segments,
"tier2_attempts": tier2_attempts,
"tier2_passed": tier2_passed,
"tier2_segments": tier2_segments,
"consec_reanchors": consec_reanchors,
"segments_attempted": segments_attempted,
"segments_passed": segments_passed,
"special_merges": special_merges,
"transition_skips": transition_skips,
"phoneme_wraps_detected": len(repetition_segments),
}
return results, profiling, gap_segments, merged_into, repetition_segments