hetchyy Claude Opus 4.6 commited on
Commit
69ad06d
·
1 Parent(s): 20adb7f

Extract segment rendering helpers from app.py into src/ui/segments.py

Browse files

Move ~530 lines of pure rendering functions (format_timestamp, render_segment_card,
render_segments, encode_segment_audio, etc.) into a dedicated module to reduce
app.py monolith size. No logic changes — verbatim extraction with imports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. app.py +6 -534
  2. src/ui/__init__.py +0 -0
  3. src/ui/segments.py +540 -0
app.py CHANGED
@@ -64,6 +64,12 @@ from src.segment_processor import (
64
  )
65
  from config import ANCHOR_SEGMENTS
66
  from data.font_data import DIGITAL_KHATT_FONT_B64, SURAH_NAME_FONT_B64
 
 
 
 
 
 
67
 
68
  # Load surah name ligature map
69
  with open(Path(__file__).parent / "data" / "ligatures.json") as _f:
@@ -142,540 +148,6 @@ def test_aoti_compilation_gpu():
142
  return test_vad_aoti_export()
143
 
144
 
145
- # =============================================================================
146
- # Segment rendering
147
- # =============================================================================
148
-
149
- def format_timestamp(seconds: float) -> str:
150
- """Format seconds as MM:SS.ms"""
151
- minutes = int(seconds // 60)
152
- secs = seconds % 60
153
- return f"{minutes}:{secs:04.1f}"
154
-
155
-
156
- def get_confidence_class(score: float) -> str:
157
- """Get CSS class based on confidence score."""
158
- if score >= CONFIDENCE_HIGH:
159
- return "segment-high"
160
- elif score >= CONFIDENCE_MED:
161
- return "segment-med"
162
- else:
163
- return "segment-low"
164
-
165
-
166
- def get_segment_word_stats(matched_ref: str) -> tuple[int, int]:
167
- """Return (word_count, ayah_span) for a matched ref. (0, 1) if unparseable."""
168
- if not matched_ref or "-" not in matched_ref:
169
- return 0, 1
170
- try:
171
- start_ref, end_ref = matched_ref.split("-", 1)
172
- start_parts = start_ref.split(":")
173
- end_parts = end_ref.split(":")
174
- if len(start_parts) < 3 or len(end_parts) < 3:
175
- return 0, 1
176
-
177
- # Ayah span
178
- start_ayah = (int(start_parts[0]), int(start_parts[1]))
179
- end_ayah = (int(end_parts[0]), int(end_parts[1]))
180
- ayah_span = 1
181
- if start_ayah != end_ayah:
182
- ayah_span = abs(end_ayah[1] - start_ayah[1]) + 1 if start_ayah[0] == end_ayah[0] else 2
183
-
184
- # Word count via index
185
- word_count = 0
186
- from src.quran_index import get_quran_index
187
- index = get_quran_index()
188
- indices = index.ref_to_indices(matched_ref)
189
- if indices:
190
- word_count = indices[1] - indices[0] + 1
191
-
192
- return word_count, ayah_span
193
- except Exception:
194
- return 0, 1
195
-
196
-
197
- def check_undersegmented(matched_ref: str, duration: float) -> bool:
198
- """Check if a segment is potentially undersegmented.
199
-
200
- Criteria: (word_count >= threshold OR ayah_span >= threshold) AND duration >= threshold.
201
- """
202
- if duration < UNDERSEG_MIN_DURATION:
203
- return False
204
- word_count, ayah_span = get_segment_word_stats(matched_ref)
205
- return word_count >= UNDERSEG_MIN_WORDS or ayah_span >= UNDERSEG_MIN_AYAH_SPAN
206
-
207
-
208
- # Arabic-Indic digits for verse markers
209
- ARABIC_DIGITS = {
210
- '0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤',
211
- '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩',
212
- }
213
-
214
- def to_arabic_numeral(number: int) -> str:
215
- """Convert an integer to Arabic-Indic numerals."""
216
- return ''.join(ARABIC_DIGITS[d] for d in str(number))
217
-
218
-
219
- def format_verse_marker(verse_num: int) -> str:
220
- """
221
- Format a verse number as an Arabic verse marker.
222
- Uses U+06DD (Arabic End of Ayah) which renders as a decorated marker
223
- in DigitalKhatt (combines U+06DD + digit into a single glyph).
224
- """
225
- numeral = to_arabic_numeral(verse_num)
226
- end_of_ayah = '\u06DD'
227
- return f'{end_of_ayah}{numeral}'
228
-
229
-
230
- # Cached verse word counts from surah_info.json
231
- _verse_word_counts_cache: dict[int, dict[int, int]] | None = None
232
-
233
-
234
- def _load_verse_word_counts() -> dict[int, dict[int, int]]:
235
- """Load and cache verse word counts from surah_info.json."""
236
- global _verse_word_counts_cache
237
- if _verse_word_counts_cache is not None:
238
- return _verse_word_counts_cache
239
-
240
- with open(SURAH_INFO_PATH, 'r', encoding='utf-8') as f:
241
- surah_info = json.load(f)
242
-
243
- _verse_word_counts_cache = {}
244
- for surah_num, data in surah_info.items():
245
- surah_int = int(surah_num)
246
- _verse_word_counts_cache[surah_int] = {}
247
- for verse_data in data.get('verses', []):
248
- verse_num = verse_data.get('verse')
249
- num_words = verse_data.get('num_words', 0)
250
- if verse_num:
251
- _verse_word_counts_cache[surah_int][verse_num] = num_words
252
-
253
- return _verse_word_counts_cache
254
-
255
-
256
- def split_into_char_groups(text):
257
- """Split text into groups of base character + following combining marks.
258
-
259
- Each group is one visible "letter" — a base character followed by any
260
- diacritics (tashkeel) or other combining marks attached to it.
261
- """
262
- groups = []
263
- current = ""
264
- for ch in text:
265
- if unicodedata.category(ch).startswith('M') and ch != '\u0670':
266
- current += ch
267
- else:
268
- if current:
269
- groups.append(current)
270
- current = ch
271
- if current:
272
- groups.append(current)
273
- return groups
274
-
275
-
276
- ZWSP = '\u200b'
277
- DAGGER_ALEF = '\u0670'
278
-
279
- def _wrap_word_with_chars(word_text, pos=None):
280
- """Wrap a word in <span class="word"> with nested <span class="char"> per letter group."""
281
- # Strip tatweel (U+0640) — MFA doesn't output it, so keeping it causes
282
- # index misalignment during timestamp injection
283
- word_text = word_text.replace('\u0640', '')
284
- # Insert ZWSP before dagger alef so it can be highlighted independently
285
- spans = []
286
- for g in split_into_char_groups(word_text):
287
- if g.startswith(DAGGER_ALEF):
288
- spans.append(f'<span class="char">{ZWSP}{g}</span>')
289
- else:
290
- spans.append(f'<span class="char">{g}</span>')
291
- char_spans = "".join(spans)
292
- pos_attr = f' data-pos="{pos}"' if pos else ''
293
- return f'<span class="word"{pos_attr}>{char_spans}</span>'
294
-
295
-
296
- def get_text_with_markers(matched_ref: str) -> str | None:
297
- """
298
- Generate matched text with verse markers inserted at verse boundaries.
299
-
300
- Uses position-based detection: iterates words and inserts an HTML marker
301
- after the last word of each verse (matching recitation_app approach).
302
-
303
- Args:
304
- matched_ref: Reference like "2:255:1-2:255:5"
305
-
306
- Returns:
307
- Text with verse markers, or None if ref is invalid
308
- """
309
- if not matched_ref:
310
- return None
311
-
312
- from src.quran_index import get_quran_index
313
- index = get_quran_index()
314
-
315
- indices = index.ref_to_indices(matched_ref)
316
- if not indices:
317
- return None
318
-
319
- start_idx, end_idx = indices
320
- verse_word_counts = _load_verse_word_counts()
321
-
322
- parts = []
323
- for w in index.words[start_idx:end_idx + 1]:
324
- parts.append(_wrap_word_with_chars(w.display_text, pos=f"{w.surah}:{w.ayah}:{w.word}"))
325
- # Check if this is the last word of its verse
326
- num_words = verse_word_counts.get(w.surah, {}).get(w.ayah, 0)
327
- if num_words > 0 and w.word == num_words:
328
- parts.append(format_verse_marker(w.ayah))
329
-
330
- return " ".join(parts)
331
-
332
-
333
- def simplify_ref(ref: str) -> str:
334
- """Simplify a matched_ref like '84:9:1-84:9:4' to '84:9:1-4' when same verse."""
335
- if not ref or "-" not in ref:
336
- return ref
337
- parts = ref.split("-")
338
- if len(parts) != 2:
339
- return ref
340
- start, end = parts
341
- start_parts = start.split(":")
342
- end_parts = end.split(":")
343
- if len(start_parts) == 3 and len(end_parts) == 3:
344
- if start_parts[0] == end_parts[0] and start_parts[1] == end_parts[1]:
345
- return f"{start}-{end_parts[2]}"
346
- return ref
347
-
348
-
349
- def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = None, sample_rate: int = 0, render_key: str = "", segment_dir: Path = None, audio_preload: str = "metadata", audio_inline: bool = False) -> str:
350
- """Render a single segment as an HTML card with optional audio player.
351
-
352
- Args:
353
- seg: Segment info
354
- idx: Segment index
355
- audio_int16: Full audio as int16 array for writing per-segment WAV files
356
- sample_rate: Audio sample rate in Hz
357
- render_key: Unique key to prevent browser caching between renders
358
- segment_dir: Directory to write segment WAV files into
359
- """
360
- confidence_class = get_confidence_class(seg.match_score)
361
- confidence_badge_class = confidence_class # preserve original for badge color
362
- if seg.has_missing_words:
363
- confidence_class = "segment-low"
364
- if seg.potentially_undersegmented and confidence_class != "segment-low":
365
- confidence_class = "segment-underseg"
366
-
367
- timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
368
- duration = seg.end_time - seg.start_time
369
-
370
- # Format reference (simplify same-verse refs)
371
- ref_display = simplify_ref(seg.matched_ref) if seg.matched_ref else ""
372
-
373
- # Confidence percentage with label
374
- confidence_pct = f"Confidence: {seg.match_score:.0%}"
375
-
376
- # Undersegmented badge
377
- underseg_badge = ""
378
- if seg.potentially_undersegmented:
379
- underseg_badge = '<div class="segment-badge segment-underseg-badge">Potentially Undersegmented</div>'
380
-
381
- # Missing words badge
382
- missing_badge = ""
383
- if seg.has_missing_words:
384
- missing_badge = '<div class="segment-badge segment-low-badge">Missing Words</div>'
385
-
386
- # Error display
387
- error_html = ""
388
- if seg.error:
389
- error_html = f'<div class="segment-error">{seg.error}</div>'
390
-
391
- # Audio player HTML — each segment gets its own WAV file served by Gradio.
392
- audio_html = ""
393
- if audio_int16 is not None and sample_rate > 0 and segment_dir is not None:
394
- audio_src = encode_segment_audio(audio_int16, sample_rate, seg.start_time, seg.end_time, segment_dir, idx, inline=audio_inline)
395
- # Add animate button only if segment has matched_ref (Quran text with word spans)
396
- animate_btn = ""
397
- if seg.matched_ref:
398
- animate_btn = f'<button class="animate-btn" data-segment="{idx}" disabled>Animate</button>'
399
- audio_html = f'''
400
- <div class="segment-audio">
401
- <audio data-src="{audio_src}" preload="none"
402
- style="display:none; width: 100%; height: 32px;">
403
- </audio>
404
- <button class="play-btn">&#9654;</button>
405
- {animate_btn}
406
- </div>
407
- '''
408
-
409
- # Build matched text with verse markers at all verse boundaries
410
- BASMALA_TEXT = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم"
411
- ISTIATHA_TEXT = "أَعُوذُ بِٱللَّهِ مِنَ الشَّيْطَانِ الرَّجِيم"
412
- COMBINED_PREFIX = ISTIATHA_TEXT + " ۝ " + BASMALA_TEXT
413
- _SPECIAL_PREFIXES = [COMBINED_PREFIX, ISTIATHA_TEXT, BASMALA_TEXT]
414
-
415
- # Helper to wrap words in spans
416
- def wrap_words_in_spans(text):
417
- return " ".join(_wrap_word_with_chars(w) for w in text.split())
418
-
419
- if seg.matched_ref:
420
- # Generate text with markers from the index
421
- text_html = get_text_with_markers(seg.matched_ref)
422
- if text_html and seg.matched_text:
423
- # Check for any special prefix (fused or forward-merged)
424
- for _sp_name, _sp in [("Isti'adha+Basmala", COMBINED_PREFIX),
425
- ("Isti'adha", ISTIATHA_TEXT),
426
- ("Basmala", BASMALA_TEXT)]:
427
- if seg.matched_text.startswith(_sp):
428
- mfa_prefix = f"{_sp_name}+{seg.matched_ref}"
429
- words = _sp.replace(" ۝ ", " ").split()
430
- prefix_html = " ".join(
431
- _wrap_word_with_chars(w, pos=f"{mfa_prefix}:0:0:{i+1}")
432
- for i, w in enumerate(words)
433
- )
434
- text_html = prefix_html + " " + text_html
435
- break
436
- elif not text_html:
437
- # Special ref (Basmala/Isti'adha): wrap words with indexed data-pos
438
- # so MFA timestamps can be injected later
439
- if seg.matched_ref and seg.matched_text:
440
- words = seg.matched_text.split()
441
- text_html = " ".join(
442
- _wrap_word_with_chars(w, pos=f"{seg.matched_ref}:0:0:{i+1}")
443
- for i, w in enumerate(words)
444
- )
445
- else:
446
- text_html = seg.matched_text or ""
447
- elif seg.matched_text:
448
- # Special segments (Basmala/Isti'adha) have text but no ref
449
- text_html = wrap_words_in_spans(seg.matched_text)
450
- else:
451
- text_html = ""
452
-
453
- confidence_badge = "" if seg.has_missing_words else f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
454
-
455
- # Build inline header: Segment N | ref | duration | time range
456
- header_parts = [f"Segment {idx + 1}"]
457
- if ref_display:
458
- header_parts.append(ref_display)
459
- header_parts.append(f"{duration:.1f}s")
460
- header_parts.append(timestamp)
461
- header_text = " | ".join(header_parts)
462
-
463
- html = f'''
464
- <div class="segment-card {confidence_class}" data-duration="{duration:.3f}" data-segment-idx="{idx}" data-matched-ref="{seg.matched_ref or ''}" data-start-time="{seg.start_time:.4f}" data-end-time="{seg.end_time:.4f}">
465
- <div class="segment-header">
466
- <div class="segment-title">{header_text}</div>
467
- <div class="segment-badges">
468
- {underseg_badge}
469
- {confidence_badge}
470
- {missing_badge}
471
- </div>
472
- </div>
473
-
474
- {audio_html}
475
-
476
- <div class="segment-text">
477
- {text_html}
478
- </div>
479
-
480
- {error_html}
481
- </div>
482
- '''
483
- return html
484
-
485
-
486
- def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None) -> str:
487
- """Render all segments as HTML with optional audio players.
488
-
489
- Args:
490
- segments: List of SegmentInfo objects
491
- audio_int16: Full audio as int16 array for writing per-segment WAV files
492
- sample_rate: Audio sample rate in Hz
493
- segment_dir: Directory containing per-segment WAV files
494
- """
495
- import time
496
- import wave
497
-
498
- if not segments:
499
- return '<div class="no-segments">No segments detected</div>'
500
-
501
- # Generate unique key for this render to prevent audio caching
502
- render_key = str(int(time.time() * 1000))
503
-
504
- # Write full audio file for unified megacard playback
505
- full_audio_url = ""
506
- if audio_int16 is not None and sample_rate > 0 and segment_dir:
507
- full_path = segment_dir / "full.wav"
508
- with wave.open(str(full_path), 'wb') as wf:
509
- wf.setnchannels(1)
510
- wf.setsampwidth(2)
511
- wf.setframerate(sample_rate)
512
- wf.writeframes(audio_int16.tobytes())
513
- full_audio_url = f"/gradio_api/file={full_path}"
514
-
515
- # Categorize segments by confidence level (1-indexed for display)
516
- med_segments = [i + 1 for i, s in enumerate(segments) if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH]
517
- low_segments = [i + 1 for i, s in enumerate(segments) if s.match_score < CONFIDENCE_MED]
518
-
519
- # Build header with confidence summary
520
- header_parts = []
521
-
522
- header_parts.append(f'<div class="segments-header">Found {len(segments)} segments</div>')
523
-
524
- # Combined review summary: merge medium and low confidence segments into one color-coded list
525
- low_set = set(low_segments)
526
- all_review = sorted(set(med_segments) | low_set)
527
- if all_review:
528
- def _span(n: int) -> str:
529
- css = "segment-low-text" if n in low_set else "segment-med-text"
530
- return f'<span class="{css}">{n}</span>'
531
-
532
- if len(all_review) <= REVIEW_SUMMARY_MAX_SEGMENTS:
533
- seg_html = ", ".join(_span(n) for n in all_review)
534
- else:
535
- seg_html = ", ".join(_span(n) for n in all_review[:REVIEW_SUMMARY_MAX_SEGMENTS])
536
- remaining = len(all_review) - REVIEW_SUMMARY_MAX_SEGMENTS
537
- seg_html += f" ... and {remaining} more"
538
-
539
- header_parts.append(
540
- f'<div class="segments-review-summary">'
541
- f'Needs review: {len(all_review)} (segments {seg_html})'
542
- f'</div>'
543
- )
544
-
545
- missing_segments = [i + 1 for i, s in enumerate(segments) if s.has_missing_words]
546
- if missing_segments:
547
- # Group consecutive segment numbers into pairs (gaps always flag both neighbors)
548
- missing_pairs = []
549
- i = 0
550
- while i < len(missing_segments):
551
- if i + 1 < len(missing_segments) and missing_segments[i + 1] == missing_segments[i] + 1:
552
- missing_pairs.append(f"{missing_segments[i]}/{missing_segments[i + 1]}")
553
- i += 2
554
- else:
555
- missing_pairs.append(str(missing_segments[i]))
556
- i += 1
557
-
558
- if len(missing_pairs) <= REVIEW_SUMMARY_MAX_SEGMENTS:
559
- pairs_display = ", ".join(missing_pairs)
560
- else:
561
- pairs_display = ", ".join(missing_pairs[:REVIEW_SUMMARY_MAX_SEGMENTS])
562
- remaining = len(missing_pairs) - REVIEW_SUMMARY_MAX_SEGMENTS
563
- pairs_display += f" ... and {remaining} more"
564
-
565
- header_parts.append(
566
- f'<div class="segments-review-summary">'
567
- f'Segments with missing words: <span class="segment-low-text">{len(missing_pairs)} (segments {pairs_display})</span>'
568
- f'</div>'
569
- )
570
-
571
- underseg_segments = [i + 1 for i, s in enumerate(segments) if s.potentially_undersegmented]
572
- if underseg_segments:
573
- if len(underseg_segments) <= REVIEW_SUMMARY_MAX_SEGMENTS:
574
- underseg_display = ", ".join(str(n) for n in underseg_segments)
575
- else:
576
- underseg_display = ", ".join(str(n) for n in underseg_segments[:REVIEW_SUMMARY_MAX_SEGMENTS])
577
- remaining = len(underseg_segments) - REVIEW_SUMMARY_MAX_SEGMENTS
578
- underseg_display += f" ... and {remaining} more"
579
-
580
- header_parts.append(
581
- f'<div class="segments-review-summary">'
582
- f'Potentially undersegmented: <span class="segment-underseg-text">{len(underseg_segments)} (segments {underseg_display})</span>'
583
- f'</div>'
584
- )
585
-
586
- html_parts = [
587
- f'<div class="segments-container" data-render-key="{render_key}" data-full-audio="{full_audio_url}">',
588
- "\n".join(header_parts),
589
- ]
590
-
591
- for idx, seg in enumerate(segments):
592
- inline = idx < AUDIO_PRELOAD_COUNT
593
- preload = "auto" if inline else "metadata"
594
- html_parts.append(render_segment_card(seg, idx, audio_int16, sample_rate, render_key, segment_dir, audio_preload=preload, audio_inline=inline))
595
-
596
- html_parts.append('</div>')
597
-
598
- return "\n".join(html_parts)
599
-
600
-
601
- # =============================================================================
602
- # Main processing
603
- # =============================================================================
604
-
605
- def encode_segment_audio(
606
- audio_int16: np.ndarray, sample_rate: int,
607
- start_time: float, end_time: float,
608
- segment_dir: Path, segment_idx: int,
609
- inline: bool = False,
610
- ) -> str:
611
- """Write a segment's audio slice as a WAV file and return a src URL.
612
-
613
- Args:
614
- audio_int16: Full audio already converted to int16 (avoids per-segment conversion).
615
- sample_rate: Sample rate in Hz.
616
- start_time: Segment start in seconds.
617
- end_time: Segment end in seconds.
618
- segment_dir: Directory to write the WAV file into.
619
- segment_idx: Segment index (used for filename).
620
- inline: If True, return a base64 data URI instead of a file URL.
621
-
622
- Returns a ``data:`` URI (inline) or ``/gradio_api/file=`` URL.
623
- """
624
- import wave
625
- import io
626
-
627
- start_sample = int(start_time * sample_rate)
628
- end_sample = int(end_time * sample_rate)
629
- segment_audio = audio_int16[start_sample:end_sample]
630
-
631
- # Always write WAV to disk (needed by MFA timestamp computation)
632
- path = segment_dir / f"seg_{segment_idx}.wav"
633
- with wave.open(str(path), 'wb') as wf:
634
- wf.setnchannels(1)
635
- wf.setsampwidth(2)
636
- wf.setframerate(sample_rate)
637
- wf.writeframes(segment_audio.tobytes())
638
-
639
- if inline:
640
- import base64
641
- with open(path, 'rb') as f:
642
- b64 = base64.b64encode(f.read()).decode('ascii')
643
- return f"data:audio/wav;base64,{b64}"
644
-
645
- return f"/gradio_api/file={path}"
646
-
647
-
648
- def is_end_of_verse(matched_ref: str) -> bool:
649
- """
650
- Check if a reference ends at the last word of a verse.
651
- Expects formats like "2:255:1-2:255:5" or "2:255:5".
652
- """
653
- if not matched_ref or ":" not in matched_ref:
654
- return False
655
-
656
- try:
657
- # Take the end part of the range (or the single ref)
658
- end_ref = matched_ref.split("-")[-1]
659
- parts = end_ref.split(":")
660
- if len(parts) < 3:
661
- return False
662
-
663
- surah = int(parts[0])
664
- ayah = int(parts[1])
665
- word = int(parts[2])
666
-
667
- verse_word_counts = _load_verse_word_counts()
668
- if surah not in verse_word_counts:
669
- return False
670
-
671
- num_words = verse_word_counts[surah].get(ayah, 0)
672
- return word >= num_words
673
- except Exception as e:
674
- print(f"Error checking end of verse: {e}")
675
-
676
- return False
677
-
678
-
679
  def _run_post_vad_pipeline(
680
  audio, sample_rate, intervals,
681
  model_name, device, profiling, pipeline_start, progress_steps,
 
64
  )
65
  from config import ANCHOR_SEGMENTS
66
  from data.font_data import DIGITAL_KHATT_FONT_B64, SURAH_NAME_FONT_B64
67
+ from src.ui.segments import (
68
+ format_timestamp, get_confidence_class, get_segment_word_stats,
69
+ check_undersegmented, to_arabic_numeral, format_verse_marker,
70
+ split_into_char_groups, get_text_with_markers, simplify_ref,
71
+ render_segment_card, render_segments, encode_segment_audio, is_end_of_verse,
72
+ )
73
 
74
  # Load surah name ligature map
75
  with open(Path(__file__).parent / "data" / "ligatures.json") as _f:
 
148
  return test_vad_aoti_export()
149
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def _run_post_vad_pipeline(
152
  audio, sample_rate, intervals,
153
  model_name, device, profiling, pipeline_start, progress_steps,
src/ui/__init__.py ADDED
File without changes
src/ui/segments.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Segment rendering and text formatting helpers.
2
+
3
+ Extracted from app.py — pure functions with no upward dependencies.
4
+ """
5
+ import json
6
+ import time
7
+ import wave
8
+ import io
9
+ import base64
10
+ import unicodedata
11
+ from pathlib import Path
12
+
13
+ import numpy as np
14
+
15
+ from config import (
16
+ CONFIDENCE_HIGH, CONFIDENCE_MED,
17
+ UNDERSEG_MIN_WORDS, UNDERSEG_MIN_AYAH_SPAN, UNDERSEG_MIN_DURATION,
18
+ REVIEW_SUMMARY_MAX_SEGMENTS, AUDIO_PRELOAD_COUNT,
19
+ SURAH_INFO_PATH,
20
+ )
21
+ from src.segment_types import SegmentInfo
22
+
23
+
24
+ def format_timestamp(seconds: float) -> str:
25
+ """Format seconds as MM:SS.ms"""
26
+ minutes = int(seconds // 60)
27
+ secs = seconds % 60
28
+ return f"{minutes}:{secs:04.1f}"
29
+
30
+
31
+ def get_confidence_class(score: float) -> str:
32
+ """Get CSS class based on confidence score."""
33
+ if score >= CONFIDENCE_HIGH:
34
+ return "segment-high"
35
+ elif score >= CONFIDENCE_MED:
36
+ return "segment-med"
37
+ else:
38
+ return "segment-low"
39
+
40
+
41
+ def get_segment_word_stats(matched_ref: str) -> tuple[int, int]:
42
+ """Return (word_count, ayah_span) for a matched ref. (0, 1) if unparseable."""
43
+ if not matched_ref or "-" not in matched_ref:
44
+ return 0, 1
45
+ try:
46
+ start_ref, end_ref = matched_ref.split("-", 1)
47
+ start_parts = start_ref.split(":")
48
+ end_parts = end_ref.split(":")
49
+ if len(start_parts) < 3 or len(end_parts) < 3:
50
+ return 0, 1
51
+
52
+ # Ayah span
53
+ start_ayah = (int(start_parts[0]), int(start_parts[1]))
54
+ end_ayah = (int(end_parts[0]), int(end_parts[1]))
55
+ ayah_span = 1
56
+ if start_ayah != end_ayah:
57
+ ayah_span = abs(end_ayah[1] - start_ayah[1]) + 1 if start_ayah[0] == end_ayah[0] else 2
58
+
59
+ # Word count via index
60
+ word_count = 0
61
+ from src.quran_index import get_quran_index
62
+ index = get_quran_index()
63
+ indices = index.ref_to_indices(matched_ref)
64
+ if indices:
65
+ word_count = indices[1] - indices[0] + 1
66
+
67
+ return word_count, ayah_span
68
+ except Exception:
69
+ return 0, 1
70
+
71
+
72
+ def check_undersegmented(matched_ref: str, duration: float) -> bool:
73
+ """Check if a segment is potentially undersegmented.
74
+
75
+ Criteria: (word_count >= threshold OR ayah_span >= threshold) AND duration >= threshold.
76
+ """
77
+ if duration < UNDERSEG_MIN_DURATION:
78
+ return False
79
+ word_count, ayah_span = get_segment_word_stats(matched_ref)
80
+ return word_count >= UNDERSEG_MIN_WORDS or ayah_span >= UNDERSEG_MIN_AYAH_SPAN
81
+
82
+
83
+ # Arabic-Indic digits for verse markers
84
+ ARABIC_DIGITS = {
85
+ '0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤',
86
+ '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩',
87
+ }
88
+
89
+ def to_arabic_numeral(number: int) -> str:
90
+ """Convert an integer to Arabic-Indic numerals."""
91
+ return ''.join(ARABIC_DIGITS[d] for d in str(number))
92
+
93
+
94
+ def format_verse_marker(verse_num: int) -> str:
95
+ """
96
+ Format a verse number as an Arabic verse marker.
97
+ Uses U+06DD (Arabic End of Ayah) which renders as a decorated marker
98
+ in DigitalKhatt (combines U+06DD + digit into a single glyph).
99
+ """
100
+ numeral = to_arabic_numeral(verse_num)
101
+ end_of_ayah = '\u06DD'
102
+ return f'{end_of_ayah}{numeral}'
103
+
104
+
105
+ # Cached verse word counts from surah_info.json
106
+ _verse_word_counts_cache: dict[int, dict[int, int]] | None = None
107
+
108
+
109
+ def _load_verse_word_counts() -> dict[int, dict[int, int]]:
110
+ """Load and cache verse word counts from surah_info.json."""
111
+ global _verse_word_counts_cache
112
+ if _verse_word_counts_cache is not None:
113
+ return _verse_word_counts_cache
114
+
115
+ with open(SURAH_INFO_PATH, 'r', encoding='utf-8') as f:
116
+ surah_info = json.load(f)
117
+
118
+ _verse_word_counts_cache = {}
119
+ for surah_num, data in surah_info.items():
120
+ surah_int = int(surah_num)
121
+ _verse_word_counts_cache[surah_int] = {}
122
+ for verse_data in data.get('verses', []):
123
+ verse_num = verse_data.get('verse')
124
+ num_words = verse_data.get('num_words', 0)
125
+ if verse_num:
126
+ _verse_word_counts_cache[surah_int][verse_num] = num_words
127
+
128
+ return _verse_word_counts_cache
129
+
130
+
131
+ def split_into_char_groups(text):
132
+ """Split text into groups of base character + following combining marks.
133
+
134
+ Each group is one visible "letter" — a base character followed by any
135
+ diacritics (tashkeel) or other combining marks attached to it.
136
+ """
137
+ groups = []
138
+ current = ""
139
+ for ch in text:
140
+ if unicodedata.category(ch).startswith('M') and ch != '\u0670':
141
+ current += ch
142
+ else:
143
+ if current:
144
+ groups.append(current)
145
+ current = ch
146
+ if current:
147
+ groups.append(current)
148
+ return groups
149
+
150
+
151
+ ZWSP = '\u200b'
152
+ DAGGER_ALEF = '\u0670'
153
+
154
+ def _wrap_word_with_chars(word_text, pos=None):
155
+ """Wrap a word in <span class="word"> with nested <span class="char"> per letter group."""
156
+ # Strip tatweel (U+0640) — MFA doesn't output it, so keeping it causes
157
+ # index misalignment during timestamp injection
158
+ word_text = word_text.replace('\u0640', '')
159
+ # Insert ZWSP before dagger alef so it can be highlighted independently
160
+ spans = []
161
+ for g in split_into_char_groups(word_text):
162
+ if g.startswith(DAGGER_ALEF):
163
+ spans.append(f'<span class="char">{ZWSP}{g}</span>')
164
+ else:
165
+ spans.append(f'<span class="char">{g}</span>')
166
+ char_spans = "".join(spans)
167
+ pos_attr = f' data-pos="{pos}"' if pos else ''
168
+ return f'<span class="word"{pos_attr}>{char_spans}</span>'
169
+
170
+
171
+ def get_text_with_markers(matched_ref: str) -> str | None:
172
+ """
173
+ Generate matched text with verse markers inserted at verse boundaries.
174
+
175
+ Uses position-based detection: iterates words and inserts an HTML marker
176
+ after the last word of each verse (matching recitation_app approach).
177
+
178
+ Args:
179
+ matched_ref: Reference like "2:255:1-2:255:5"
180
+
181
+ Returns:
182
+ Text with verse markers, or None if ref is invalid
183
+ """
184
+ if not matched_ref:
185
+ return None
186
+
187
+ from src.quran_index import get_quran_index
188
+ index = get_quran_index()
189
+
190
+ indices = index.ref_to_indices(matched_ref)
191
+ if not indices:
192
+ return None
193
+
194
+ start_idx, end_idx = indices
195
+ verse_word_counts = _load_verse_word_counts()
196
+
197
+ parts = []
198
+ for w in index.words[start_idx:end_idx + 1]:
199
+ parts.append(_wrap_word_with_chars(w.display_text, pos=f"{w.surah}:{w.ayah}:{w.word}"))
200
+ # Check if this is the last word of its verse
201
+ num_words = verse_word_counts.get(w.surah, {}).get(w.ayah, 0)
202
+ if num_words > 0 and w.word == num_words:
203
+ parts.append(format_verse_marker(w.ayah))
204
+
205
+ return " ".join(parts)
206
+
207
+
208
+ def simplify_ref(ref: str) -> str:
209
+ """Simplify a matched_ref like '84:9:1-84:9:4' to '84:9:1-4' when same verse."""
210
+ if not ref or "-" not in ref:
211
+ return ref
212
+ parts = ref.split("-")
213
+ if len(parts) != 2:
214
+ return ref
215
+ start, end = parts
216
+ start_parts = start.split(":")
217
+ end_parts = end.split(":")
218
+ if len(start_parts) == 3 and len(end_parts) == 3:
219
+ if start_parts[0] == end_parts[0] and start_parts[1] == end_parts[1]:
220
+ return f"{start}-{end_parts[2]}"
221
+ return ref
222
+
223
+
224
+ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = None, sample_rate: int = 0, render_key: str = "", segment_dir: Path = None, audio_preload: str = "metadata", audio_inline: bool = False) -> str:
225
+ """Render a single segment as an HTML card with optional audio player.
226
+
227
+ Args:
228
+ seg: Segment info
229
+ idx: Segment index
230
+ audio_int16: Full audio as int16 array for writing per-segment WAV files
231
+ sample_rate: Audio sample rate in Hz
232
+ render_key: Unique key to prevent browser caching between renders
233
+ segment_dir: Directory to write segment WAV files into
234
+ """
235
+ confidence_class = get_confidence_class(seg.match_score)
236
+ confidence_badge_class = confidence_class # preserve original for badge color
237
+ if seg.has_missing_words:
238
+ confidence_class = "segment-low"
239
+ if seg.potentially_undersegmented and confidence_class != "segment-low":
240
+ confidence_class = "segment-underseg"
241
+
242
+ timestamp = f"{format_timestamp(seg.start_time)} - {format_timestamp(seg.end_time)}"
243
+ duration = seg.end_time - seg.start_time
244
+
245
+ # Format reference (simplify same-verse refs)
246
+ ref_display = simplify_ref(seg.matched_ref) if seg.matched_ref else ""
247
+
248
+ # Confidence percentage with label
249
+ confidence_pct = f"Confidence: {seg.match_score:.0%}"
250
+
251
+ # Undersegmented badge
252
+ underseg_badge = ""
253
+ if seg.potentially_undersegmented:
254
+ underseg_badge = '<div class="segment-badge segment-underseg-badge">Potentially Undersegmented</div>'
255
+
256
+ # Missing words badge
257
+ missing_badge = ""
258
+ if seg.has_missing_words:
259
+ missing_badge = '<div class="segment-badge segment-low-badge">Missing Words</div>'
260
+
261
+ # Error display
262
+ error_html = ""
263
+ if seg.error:
264
+ error_html = f'<div class="segment-error">{seg.error}</div>'
265
+
266
+ # Audio player HTML — each segment gets its own WAV file served by Gradio.
267
+ audio_html = ""
268
+ if audio_int16 is not None and sample_rate > 0 and segment_dir is not None:
269
+ audio_src = encode_segment_audio(audio_int16, sample_rate, seg.start_time, seg.end_time, segment_dir, idx, inline=audio_inline)
270
+ # Add animate button only if segment has matched_ref (Quran text with word spans)
271
+ animate_btn = ""
272
+ if seg.matched_ref:
273
+ animate_btn = f'<button class="animate-btn" data-segment="{idx}" disabled>Animate</button>'
274
+ audio_html = f'''
275
+ <div class="segment-audio">
276
+ <audio data-src="{audio_src}" preload="none"
277
+ style="display:none; width: 100%; height: 32px;">
278
+ </audio>
279
+ <button class="play-btn">&#9654;</button>
280
+ {animate_btn}
281
+ </div>
282
+ '''
283
+
284
+ # Build matched text with verse markers at all verse boundaries
285
+ BASMALA_TEXT = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيم"
286
+ ISTIATHA_TEXT = "أَعُوذُ بِٱللَّهِ مِنَ الشَّيْطَانِ الرَّجِيم"
287
+ COMBINED_PREFIX = ISTIATHA_TEXT + " ۝ " + BASMALA_TEXT
288
+ _SPECIAL_PREFIXES = [COMBINED_PREFIX, ISTIATHA_TEXT, BASMALA_TEXT]
289
+
290
+ # Helper to wrap words in spans
291
+ def wrap_words_in_spans(text):
292
+ return " ".join(_wrap_word_with_chars(w) for w in text.split())
293
+
294
+ if seg.matched_ref:
295
+ # Generate text with markers from the index
296
+ text_html = get_text_with_markers(seg.matched_ref)
297
+ if text_html and seg.matched_text:
298
+ # Check for any special prefix (fused or forward-merged)
299
+ for _sp_name, _sp in [("Isti'adha+Basmala", COMBINED_PREFIX),
300
+ ("Isti'adha", ISTIATHA_TEXT),
301
+ ("Basmala", BASMALA_TEXT)]:
302
+ if seg.matched_text.startswith(_sp):
303
+ mfa_prefix = f"{_sp_name}+{seg.matched_ref}"
304
+ words = _sp.replace(" ۝ ", " ").split()
305
+ prefix_html = " ".join(
306
+ _wrap_word_with_chars(w, pos=f"{mfa_prefix}:0:0:{i+1}")
307
+ for i, w in enumerate(words)
308
+ )
309
+ text_html = prefix_html + " " + text_html
310
+ break
311
+ elif not text_html:
312
+ # Special ref (Basmala/Isti'adha): wrap words with indexed data-pos
313
+ # so MFA timestamps can be injected later
314
+ if seg.matched_ref and seg.matched_text:
315
+ words = seg.matched_text.split()
316
+ text_html = " ".join(
317
+ _wrap_word_with_chars(w, pos=f"{seg.matched_ref}:0:0:{i+1}")
318
+ for i, w in enumerate(words)
319
+ )
320
+ else:
321
+ text_html = seg.matched_text or ""
322
+ elif seg.matched_text:
323
+ # Special segments (Basmala/Isti'adha) have text but no ref
324
+ text_html = wrap_words_in_spans(seg.matched_text)
325
+ else:
326
+ text_html = ""
327
+
328
+ confidence_badge = "" if seg.has_missing_words else f'<div class="segment-badge {confidence_badge_class}-badge">{confidence_pct}</div>'
329
+
330
+ # Build inline header: Segment N | ref | duration | time range
331
+ header_parts = [f"Segment {idx + 1}"]
332
+ if ref_display:
333
+ header_parts.append(ref_display)
334
+ header_parts.append(f"{duration:.1f}s")
335
+ header_parts.append(timestamp)
336
+ header_text = " | ".join(header_parts)
337
+
338
+ html = f'''
339
+ <div class="segment-card {confidence_class}" data-duration="{duration:.3f}" data-segment-idx="{idx}" data-matched-ref="{seg.matched_ref or ''}" data-start-time="{seg.start_time:.4f}" data-end-time="{seg.end_time:.4f}">
340
+ <div class="segment-header">
341
+ <div class="segment-title">{header_text}</div>
342
+ <div class="segment-badges">
343
+ {underseg_badge}
344
+ {confidence_badge}
345
+ {missing_badge}
346
+ </div>
347
+ </div>
348
+
349
+ {audio_html}
350
+
351
+ <div class="segment-text">
352
+ {text_html}
353
+ </div>
354
+
355
+ {error_html}
356
+ </div>
357
+ '''
358
+ return html
359
+
360
+
361
+ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None) -> str:
362
+ """Render all segments as HTML with optional audio players.
363
+
364
+ Args:
365
+ segments: List of SegmentInfo objects
366
+ audio_int16: Full audio as int16 array for writing per-segment WAV files
367
+ sample_rate: Audio sample rate in Hz
368
+ segment_dir: Directory containing per-segment WAV files
369
+ """
370
+ if not segments:
371
+ return '<div class="no-segments">No segments detected</div>'
372
+
373
+ # Generate unique key for this render to prevent audio caching
374
+ render_key = str(int(time.time() * 1000))
375
+
376
+ # Write full audio file for unified megacard playback
377
+ full_audio_url = ""
378
+ if audio_int16 is not None and sample_rate > 0 and segment_dir:
379
+ full_path = segment_dir / "full.wav"
380
+ with wave.open(str(full_path), 'wb') as wf:
381
+ wf.setnchannels(1)
382
+ wf.setsampwidth(2)
383
+ wf.setframerate(sample_rate)
384
+ wf.writeframes(audio_int16.tobytes())
385
+ full_audio_url = f"/gradio_api/file={full_path}"
386
+
387
+ # Categorize segments by confidence level (1-indexed for display)
388
+ med_segments = [i + 1 for i, s in enumerate(segments) if CONFIDENCE_MED <= s.match_score < CONFIDENCE_HIGH]
389
+ low_segments = [i + 1 for i, s in enumerate(segments) if s.match_score < CONFIDENCE_MED]
390
+
391
+ # Build header with confidence summary
392
+ header_parts = []
393
+
394
+ header_parts.append(f'<div class="segments-header">Found {len(segments)} segments</div>')
395
+
396
+ # Combined review summary: merge medium and low confidence segments into one color-coded list
397
+ low_set = set(low_segments)
398
+ all_review = sorted(set(med_segments) | low_set)
399
+ if all_review:
400
+ def _span(n: int) -> str:
401
+ css = "segment-low-text" if n in low_set else "segment-med-text"
402
+ return f'<span class="{css}">{n}</span>'
403
+
404
+ if len(all_review) <= REVIEW_SUMMARY_MAX_SEGMENTS:
405
+ seg_html = ", ".join(_span(n) for n in all_review)
406
+ else:
407
+ seg_html = ", ".join(_span(n) for n in all_review[:REVIEW_SUMMARY_MAX_SEGMENTS])
408
+ remaining = len(all_review) - REVIEW_SUMMARY_MAX_SEGMENTS
409
+ seg_html += f" ... and {remaining} more"
410
+
411
+ header_parts.append(
412
+ f'<div class="segments-review-summary">'
413
+ f'Needs review: {len(all_review)} (segments {seg_html})'
414
+ f'</div>'
415
+ )
416
+
417
+ missing_segments = [i + 1 for i, s in enumerate(segments) if s.has_missing_words]
418
+ if missing_segments:
419
+ # Group consecutive segment numbers into pairs (gaps always flag both neighbors)
420
+ missing_pairs = []
421
+ i = 0
422
+ while i < len(missing_segments):
423
+ if i + 1 < len(missing_segments) and missing_segments[i + 1] == missing_segments[i] + 1:
424
+ missing_pairs.append(f"{missing_segments[i]}/{missing_segments[i + 1]}")
425
+ i += 2
426
+ else:
427
+ missing_pairs.append(str(missing_segments[i]))
428
+ i += 1
429
+
430
+ if len(missing_pairs) <= REVIEW_SUMMARY_MAX_SEGMENTS:
431
+ pairs_display = ", ".join(missing_pairs)
432
+ else:
433
+ pairs_display = ", ".join(missing_pairs[:REVIEW_SUMMARY_MAX_SEGMENTS])
434
+ remaining = len(missing_pairs) - REVIEW_SUMMARY_MAX_SEGMENTS
435
+ pairs_display += f" ... and {remaining} more"
436
+
437
+ header_parts.append(
438
+ f'<div class="segments-review-summary">'
439
+ f'Segments with missing words: <span class="segment-low-text">{len(missing_pairs)} (segments {pairs_display})</span>'
440
+ f'</div>'
441
+ )
442
+
443
+ underseg_segments = [i + 1 for i, s in enumerate(segments) if s.potentially_undersegmented]
444
+ if underseg_segments:
445
+ if len(underseg_segments) <= REVIEW_SUMMARY_MAX_SEGMENTS:
446
+ underseg_display = ", ".join(str(n) for n in underseg_segments)
447
+ else:
448
+ underseg_display = ", ".join(str(n) for n in underseg_segments[:REVIEW_SUMMARY_MAX_SEGMENTS])
449
+ remaining = len(underseg_segments) - REVIEW_SUMMARY_MAX_SEGMENTS
450
+ underseg_display += f" ... and {remaining} more"
451
+
452
+ header_parts.append(
453
+ f'<div class="segments-review-summary">'
454
+ f'Potentially undersegmented: <span class="segment-underseg-text">{len(underseg_segments)} (segments {underseg_display})</span>'
455
+ f'</div>'
456
+ )
457
+
458
+ html_parts = [
459
+ f'<div class="segments-container" data-render-key="{render_key}" data-full-audio="{full_audio_url}">',
460
+ "\n".join(header_parts),
461
+ ]
462
+
463
+ for idx, seg in enumerate(segments):
464
+ inline = idx < AUDIO_PRELOAD_COUNT
465
+ preload = "auto" if inline else "metadata"
466
+ html_parts.append(render_segment_card(seg, idx, audio_int16, sample_rate, render_key, segment_dir, audio_preload=preload, audio_inline=inline))
467
+
468
+ html_parts.append('</div>')
469
+
470
+ return "\n".join(html_parts)
471
+
472
+
473
+ def encode_segment_audio(
474
+ audio_int16: np.ndarray, sample_rate: int,
475
+ start_time: float, end_time: float,
476
+ segment_dir: Path, segment_idx: int,
477
+ inline: bool = False,
478
+ ) -> str:
479
+ """Write a segment's audio slice as a WAV file and return a src URL.
480
+
481
+ Args:
482
+ audio_int16: Full audio already converted to int16 (avoids per-segment conversion).
483
+ sample_rate: Sample rate in Hz.
484
+ start_time: Segment start in seconds.
485
+ end_time: Segment end in seconds.
486
+ segment_dir: Directory to write the WAV file into.
487
+ segment_idx: Segment index (used for filename).
488
+ inline: If True, return a base64 data URI instead of a file URL.
489
+
490
+ Returns a ``data:`` URI (inline) or ``/gradio_api/file=`` URL.
491
+ """
492
+ start_sample = int(start_time * sample_rate)
493
+ end_sample = int(end_time * sample_rate)
494
+ segment_audio = audio_int16[start_sample:end_sample]
495
+
496
+ # Always write WAV to disk (needed by MFA timestamp computation)
497
+ path = segment_dir / f"seg_{segment_idx}.wav"
498
+ with wave.open(str(path), 'wb') as wf:
499
+ wf.setnchannels(1)
500
+ wf.setsampwidth(2)
501
+ wf.setframerate(sample_rate)
502
+ wf.writeframes(segment_audio.tobytes())
503
+
504
+ if inline:
505
+ with open(path, 'rb') as f:
506
+ b64 = base64.b64encode(f.read()).decode('ascii')
507
+ return f"data:audio/wav;base64,{b64}"
508
+
509
+ return f"/gradio_api/file={path}"
510
+
511
+
512
+ def is_end_of_verse(matched_ref: str) -> bool:
513
+ """
514
+ Check if a reference ends at the last word of a verse.
515
+ Expects formats like "2:255:1-2:255:5" or "2:255:5".
516
+ """
517
+ if not matched_ref or ":" not in matched_ref:
518
+ return False
519
+
520
+ try:
521
+ # Take the end part of the range (or the single ref)
522
+ end_ref = matched_ref.split("-")[-1]
523
+ parts = end_ref.split(":")
524
+ if len(parts) < 3:
525
+ return False
526
+
527
+ surah = int(parts[0])
528
+ ayah = int(parts[1])
529
+ word = int(parts[2])
530
+
531
+ verse_word_counts = _load_verse_word_counts()
532
+ if surah not in verse_word_counts:
533
+ return False
534
+
535
+ num_words = verse_word_counts[surah].get(ayah, 0)
536
+ return word >= num_words
537
+ except Exception as e:
538
+ print(f"Error checking end of verse: {e}")
539
+
540
+ return False