Spaces:
Running on Zero
Running on Zero
Update dev tools
Browse files- src/mfa.py +74 -58
- src/ui/dev_tools.py +348 -26
- src/ui/event_wiring.py +33 -4
- src/ui/interface.py +15 -13
- src/ui/segments.py +2 -2
src/mfa.py
CHANGED
|
@@ -683,16 +683,81 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
|
|
| 683 |
)
|
| 684 |
raise
|
| 685 |
|
| 686 |
-
|
| 687 |
-
|
|
|
|
| 688 |
|
| 689 |
-
#
|
| 690 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
|
| 692 |
-
#
|
| 693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
# Inject timestamps into word spans, using segment boundaries to determine result_idx
|
| 698 |
seg_boundaries = []
|
|
@@ -754,8 +819,6 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
|
|
| 754 |
html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
|
| 755 |
|
| 756 |
# Stamp char spans with MFA letter timestamps
|
| 757 |
-
import unicodedata
|
| 758 |
-
|
| 759 |
def _stamp_chars_with_mfa(word_m):
|
| 760 |
word_open = word_m.group(1)
|
| 761 |
word_abs_start = float(word_m.group(2))
|
|
@@ -873,57 +936,10 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
|
|
| 873 |
|
| 874 |
print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
|
| 875 |
|
| 876 |
-
#
|
| 877 |
-
if cached_log_row is not None:
|
| 878 |
-
try:
|
| 879 |
-
import json as _json
|
| 880 |
-
from src.core.usage_logger import update_word_timestamps
|
| 881 |
-
_ts_log = []
|
| 882 |
-
_char_ts_log = []
|
| 883 |
-
for result in results:
|
| 884 |
-
if result.get("status") != "ok":
|
| 885 |
-
continue
|
| 886 |
-
_ts_log.append({
|
| 887 |
-
"ref": result.get("ref", ""),
|
| 888 |
-
"words": [
|
| 889 |
-
{"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
|
| 890 |
-
for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
|
| 891 |
-
],
|
| 892 |
-
})
|
| 893 |
-
_char_ts_log.append({
|
| 894 |
-
"ref": result.get("ref", ""),
|
| 895 |
-
"words": [
|
| 896 |
-
{
|
| 897 |
-
"word": w.get("word", ""),
|
| 898 |
-
"location": w.get("location", ""),
|
| 899 |
-
"letters": [
|
| 900 |
-
{"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
|
| 901 |
-
for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
|
| 902 |
-
],
|
| 903 |
-
}
|
| 904 |
-
for w in result.get("words", []) if w.get("letters")
|
| 905 |
-
],
|
| 906 |
-
})
|
| 907 |
-
update_word_timestamps(
|
| 908 |
-
cached_log_row,
|
| 909 |
-
_json.dumps(_ts_log),
|
| 910 |
-
_json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
|
| 911 |
-
)
|
| 912 |
-
except Exception as e:
|
| 913 |
-
print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
|
| 914 |
-
|
| 915 |
-
# Build enriched JSON using shared helper (UI always includes letters)
|
| 916 |
enriched_json = _build_enriched_json(
|
| 917 |
segments, results, seg_to_result_idx,
|
| 918 |
word_timestamps, letter_timestamps, "words+chars",
|
| 919 |
)
|
| 920 |
|
| 921 |
-
|
| 922 |
-
animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
|
| 923 |
-
yield (
|
| 924 |
-
html,
|
| 925 |
-
gr.update(visible=False),
|
| 926 |
-
gr.update(value=animate_all_btn_html, visible=True),
|
| 927 |
-
gr.update(visible=False),
|
| 928 |
-
enriched_json,
|
| 929 |
-
)
|
|
|
|
| 683 |
)
|
| 684 |
raise
|
| 685 |
|
| 686 |
+
html, enriched_json = inject_timestamps_into_html(
|
| 687 |
+
current_html, segments, results, seg_to_result_idx, segment_dir
|
| 688 |
+
)
|
| 689 |
|
| 690 |
+
# Log word and char timestamps to usage logger
|
| 691 |
+
if cached_log_row is not None:
|
| 692 |
+
try:
|
| 693 |
+
import json as _json
|
| 694 |
+
from src.core.usage_logger import update_word_timestamps
|
| 695 |
+
_ts_log = []
|
| 696 |
+
_char_ts_log = []
|
| 697 |
+
for result in results:
|
| 698 |
+
if result.get("status") != "ok":
|
| 699 |
+
continue
|
| 700 |
+
_ts_log.append({
|
| 701 |
+
"ref": result.get("ref", ""),
|
| 702 |
+
"words": [
|
| 703 |
+
{"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
|
| 704 |
+
for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
|
| 705 |
+
],
|
| 706 |
+
})
|
| 707 |
+
_char_ts_log.append({
|
| 708 |
+
"ref": result.get("ref", ""),
|
| 709 |
+
"words": [
|
| 710 |
+
{
|
| 711 |
+
"word": w.get("word", ""),
|
| 712 |
+
"location": w.get("location", ""),
|
| 713 |
+
"letters": [
|
| 714 |
+
{"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
|
| 715 |
+
for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
|
| 716 |
+
],
|
| 717 |
+
}
|
| 718 |
+
for w in result.get("words", []) if w.get("letters")
|
| 719 |
+
],
|
| 720 |
+
})
|
| 721 |
+
update_word_timestamps(
|
| 722 |
+
cached_log_row,
|
| 723 |
+
_json.dumps(_ts_log),
|
| 724 |
+
_json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
|
| 725 |
+
)
|
| 726 |
+
except Exception as e:
|
| 727 |
+
print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
|
| 728 |
|
| 729 |
+
# Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
|
| 730 |
+
animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
|
| 731 |
+
yield (
|
| 732 |
+
html,
|
| 733 |
+
gr.update(visible=False),
|
| 734 |
+
gr.update(value=animate_all_btn_html, visible=True),
|
| 735 |
+
gr.update(visible=False),
|
| 736 |
+
enriched_json,
|
| 737 |
+
)
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
# ---------------------------------------------------------------------------
|
| 741 |
+
# Reusable HTML timestamp injection (shared by UI generator and Dev tab)
|
| 742 |
+
# ---------------------------------------------------------------------------
|
| 743 |
+
|
| 744 |
+
def inject_timestamps_into_html(current_html, segments, results, seg_to_result_idx, segment_dir):
|
| 745 |
+
"""Inject word and char timestamps into rendered segment HTML.
|
| 746 |
+
|
| 747 |
+
Builds lookups, cross-word groups, extends timestamps, then performs
|
| 748 |
+
regex-based injection of data-start/data-end attributes into word and
|
| 749 |
+
char spans. Reusable by both the main MFA flow and the Dev tab
|
| 750 |
+
log-based flow.
|
| 751 |
|
| 752 |
+
Returns (enriched_html, enriched_json).
|
| 753 |
+
"""
|
| 754 |
+
import re
|
| 755 |
+
import unicodedata
|
| 756 |
+
|
| 757 |
+
# Build timestamp lookups
|
| 758 |
+
word_timestamps, letter_timestamps, word_to_all_results = _build_timestamp_lookups(results)
|
| 759 |
+
crossword_groups = _build_crossword_groups(results, letter_timestamps)
|
| 760 |
+
_extend_word_timestamps(word_timestamps, segments, seg_to_result_idx, results, segment_dir)
|
| 761 |
|
| 762 |
# Inject timestamps into word spans, using segment boundaries to determine result_idx
|
| 763 |
seg_boundaries = []
|
|
|
|
| 819 |
html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
|
| 820 |
|
| 821 |
# Stamp char spans with MFA letter timestamps
|
|
|
|
|
|
|
| 822 |
def _stamp_chars_with_mfa(word_m):
|
| 823 |
word_open = word_m.group(1)
|
| 824 |
word_abs_start = float(word_m.group(2))
|
|
|
|
| 936 |
|
| 937 |
print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
|
| 938 |
|
| 939 |
+
# Build enriched JSON (UI always includes letters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
enriched_json = _build_enriched_json(
|
| 941 |
segments, results, seg_to_result_idx,
|
| 942 |
word_timestamps, letter_timestamps, "words+chars",
|
| 943 |
)
|
| 944 |
|
| 945 |
+
return html, enriched_json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ui/dev_tools.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
|
| 3 |
import json
|
| 4 |
import os
|
|
|
|
| 5 |
import uuid
|
| 6 |
from datetime import datetime, timezone
|
| 7 |
from pathlib import Path
|
|
@@ -103,6 +104,9 @@ def build_dev_tab_ui(c):
|
|
| 103 |
c.dev_sort = gr.Dropdown(
|
| 104 |
choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
|
| 105 |
)
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
c.dev_table = gr.Dataframe(
|
| 108 |
headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
|
|
@@ -114,11 +118,23 @@ def build_dev_tab_ui(c):
|
|
| 114 |
wrap=True,
|
| 115 |
)
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
c.dev_detail_html = gr.HTML(value="", label="Log Detail")
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
# State
|
| 120 |
c.dev_all_rows = gr.State(value=[])
|
| 121 |
c.dev_filtered_indices = gr.State(value=[])
|
|
|
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
# ── Row extraction ─────────────────────────────────────────────────────
|
|
@@ -151,6 +167,8 @@ def _row_to_dict(row) -> dict:
|
|
| 151 |
"min_speech_ms": row.get("min_speech_ms"),
|
| 152 |
"pad_ms": row.get("pad_ms"),
|
| 153 |
"segments": row.get("segments"),
|
|
|
|
|
|
|
| 154 |
"resegmented": row.get("resegmented"),
|
| 155 |
"retranscribed": row.get("retranscribed"),
|
| 156 |
"error": row.get("error"),
|
|
@@ -236,7 +254,7 @@ def load_logs_handler():
|
|
| 236 |
return rows, indices, status, table_data
|
| 237 |
|
| 238 |
|
| 239 |
-
def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
|
| 240 |
"""Filter and sort cached rows, return new table + index mapping."""
|
| 241 |
if not all_rows:
|
| 242 |
return [], gr.update()
|
|
@@ -244,7 +262,25 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
|
|
| 244 |
surah_names = _load_surah_names()
|
| 245 |
indices = []
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
for i, row in enumerate(all_rows):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
# Device filter
|
| 249 |
if device != "All":
|
| 250 |
row_device = (row.get("device") or "").lower()
|
|
@@ -282,14 +318,144 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
|
|
| 282 |
return indices, table_data
|
| 283 |
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
|
| 286 |
-
"""When a table row is clicked, download audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
if not all_rows or not filtered_indices:
|
| 288 |
-
return
|
| 289 |
|
| 290 |
display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
|
| 291 |
if display_idx < 0 or display_idx >= len(filtered_indices):
|
| 292 |
-
return
|
| 293 |
|
| 294 |
row_idx = filtered_indices[display_idx]
|
| 295 |
row = all_rows[row_idx]
|
|
@@ -300,10 +466,51 @@ def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
|
|
| 300 |
# Build summary HTML
|
| 301 |
summary_html = _build_summary_html(row, surah_names)
|
| 302 |
|
| 303 |
-
#
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
|
| 309 |
# ── Summary HTML builder ───────────────────────────────────────────────
|
|
@@ -385,25 +592,30 @@ def _build_summary_html(row, surah_names) -> str:
|
|
| 385 |
|
| 386 |
# ── Segment reconstruction from log ───────────────────────────────────
|
| 387 |
|
| 388 |
-
def _build_segments_from_log(row, audio_id)
|
| 389 |
-
"""Build segment cards from the log's segments JSON, downloading audio on demand.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
segments_str = row.get("segments")
|
|
|
|
| 391 |
if not segments_str:
|
| 392 |
-
return
|
| 393 |
|
| 394 |
try:
|
| 395 |
runs = json.loads(segments_str)
|
| 396 |
except (json.JSONDecodeError, TypeError):
|
| 397 |
-
return '<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>'
|
| 398 |
|
| 399 |
if not runs or not isinstance(runs, list):
|
| 400 |
-
return '<div style="color: #999; padding: 20px;">Empty segment runs.</div>'
|
| 401 |
|
| 402 |
# Use the last run (most recent alignment pass)
|
| 403 |
last_run = runs[-1]
|
| 404 |
seg_list = last_run.get("segments", [])
|
| 405 |
if not seg_list:
|
| 406 |
-
return '<div style="color: #999; padding: 20px;">No segments in last run.</div>'
|
| 407 |
|
| 408 |
# Try to download audio for this specific row
|
| 409 |
audio_int16 = None
|
|
@@ -415,13 +627,14 @@ def _build_segments_from_log(row, audio_id) -> str:
|
|
| 415 |
except Exception as e:
|
| 416 |
print(f"[dev_tools] Audio download failed: {e}")
|
| 417 |
|
| 418 |
-
# Build SegmentInfo objects and
|
| 419 |
from src.core.segment_types import SegmentInfo
|
| 420 |
from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
|
| 421 |
from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
|
| 422 |
|
| 423 |
segments = []
|
| 424 |
-
|
|
|
|
| 425 |
ref = seg_data.get("ref", "")
|
| 426 |
confidence = seg_data.get("confidence", 0.0) or 0.0
|
| 427 |
start = seg_data.get("start", 0.0) or 0.0
|
|
@@ -430,10 +643,19 @@ def _build_segments_from_log(row, audio_id) -> str:
|
|
| 430 |
special_type = seg_data.get("special_type", "")
|
| 431 |
duration = end - start
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
# Reconstruct matched_text
|
| 434 |
matched_text = ""
|
| 435 |
if ref in ALL_SPECIAL_REFS:
|
| 436 |
-
# For known specials, use the constant text
|
| 437 |
if ref in SPECIAL_TEXT:
|
| 438 |
matched_text = SPECIAL_TEXT[ref]
|
| 439 |
elif ref:
|
|
@@ -460,11 +682,25 @@ def _build_segments_from_log(row, audio_id) -> str:
|
|
| 460 |
)
|
| 461 |
segments.append(seg_info)
|
| 462 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
if not segments:
|
| 464 |
-
return '<div style="color: #999; padding: 20px;">No valid segments to display.</div>'
|
| 465 |
|
| 466 |
-
|
| 467 |
-
segment_dir=segment_dir)
|
|
|
|
| 468 |
|
| 469 |
|
| 470 |
def _download_audio_for_row(audio_id: str):
|
|
@@ -477,7 +713,6 @@ def _download_audio_for_row(audio_id: str):
|
|
| 477 |
raise ValueError("No HF token")
|
| 478 |
|
| 479 |
from datasets import load_dataset
|
| 480 |
-
import librosa
|
| 481 |
|
| 482 |
ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
|
| 483 |
split="train", streaming=True)
|
|
@@ -492,15 +727,15 @@ def _download_audio_for_row(audio_id: str):
|
|
| 492 |
audio_array = audio_data["array"]
|
| 493 |
sr = audio_data["sampling_rate"]
|
| 494 |
|
| 495 |
-
# Resample to 16kHz if needed
|
| 496 |
-
if sr != 16000:
|
| 497 |
-
audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=16000)
|
| 498 |
-
sr = 16000
|
| 499 |
-
|
| 500 |
# Convert to int16
|
| 501 |
audio_float = np.clip(audio_array, -1.0, 1.0)
|
| 502 |
audio_int16 = (audio_float * 32767).astype(np.int16)
|
| 503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
# Create segment directory
|
| 505 |
segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
|
| 506 |
segment_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -508,3 +743,90 @@ def _download_audio_for_row(audio_id: str):
|
|
| 508 |
return audio_int16, sr, segment_dir
|
| 509 |
|
| 510 |
raise ValueError(f"Audio ID '{audio_id}' not found in dataset")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
+
import shutil
|
| 6 |
import uuid
|
| 7 |
from datetime import datetime, timezone
|
| 8 |
from pathlib import Path
|
|
|
|
| 104 |
c.dev_sort = gr.Dropdown(
|
| 105 |
choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
|
| 106 |
)
|
| 107 |
+
c.dev_days_filter = gr.Number(
|
| 108 |
+
label="Last N Days", value=None, precision=0, minimum=1, scale=1,
|
| 109 |
+
)
|
| 110 |
|
| 111 |
c.dev_table = gr.Dataframe(
|
| 112 |
headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
|
|
|
|
| 118 |
wrap=True,
|
| 119 |
)
|
| 120 |
|
| 121 |
+
with gr.Row():
|
| 122 |
+
c.dev_gpu_plot = gr.Plot(label="GPU: Audio Duration vs Processing Time", visible=False)
|
| 123 |
+
c.dev_cpu_plot = gr.Plot(label="CPU: Audio Duration vs Processing Time", visible=False)
|
| 124 |
+
|
| 125 |
c.dev_detail_html = gr.HTML(value="", label="Log Detail")
|
| 126 |
|
| 127 |
+
with gr.Row():
|
| 128 |
+
c.dev_compute_ts_btn = gr.Button("Compute Timestamps", variant="secondary",
|
| 129 |
+
interactive=False, visible=False)
|
| 130 |
+
c.dev_compute_ts_progress = gr.HTML(value="", visible=False)
|
| 131 |
+
c.dev_animate_all_html = gr.HTML(value="", visible=False)
|
| 132 |
+
|
| 133 |
# State
|
| 134 |
c.dev_all_rows = gr.State(value=[])
|
| 135 |
c.dev_filtered_indices = gr.State(value=[])
|
| 136 |
+
c.dev_segment_dir = gr.State(value=None)
|
| 137 |
+
c.dev_json_output = gr.State(value=None)
|
| 138 |
|
| 139 |
|
| 140 |
# ── Row extraction ─────────────────────────────────────────────────────
|
|
|
|
| 167 |
"min_speech_ms": row.get("min_speech_ms"),
|
| 168 |
"pad_ms": row.get("pad_ms"),
|
| 169 |
"segments": row.get("segments"),
|
| 170 |
+
"word_timestamps": row.get("word_timestamps"),
|
| 171 |
+
"char_timestamps": row.get("char_timestamps"),
|
| 172 |
"resegmented": row.get("resegmented"),
|
| 173 |
"retranscribed": row.get("retranscribed"),
|
| 174 |
"error": row.get("error"),
|
|
|
|
| 254 |
return rows, indices, status, table_data
|
| 255 |
|
| 256 |
|
| 257 |
+
def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by, days=None):
|
| 258 |
"""Filter and sort cached rows, return new table + index mapping."""
|
| 259 |
if not all_rows:
|
| 260 |
return [], gr.update()
|
|
|
|
| 262 |
surah_names = _load_surah_names()
|
| 263 |
indices = []
|
| 264 |
|
| 265 |
+
# Compute cutoff for days filter
|
| 266 |
+
cutoff = None
|
| 267 |
+
if days is not None and days > 0:
|
| 268 |
+
from datetime import timedelta
|
| 269 |
+
cutoff = datetime.now(timezone.utc) - timedelta(days=int(days))
|
| 270 |
+
|
| 271 |
for i, row in enumerate(all_rows):
|
| 272 |
+
# Days filter
|
| 273 |
+
if cutoff is not None:
|
| 274 |
+
ts = row.get("timestamp", "")
|
| 275 |
+
try:
|
| 276 |
+
row_dt = datetime.fromisoformat(ts)
|
| 277 |
+
if row_dt.tzinfo is None:
|
| 278 |
+
row_dt = row_dt.replace(tzinfo=timezone.utc)
|
| 279 |
+
if row_dt < cutoff:
|
| 280 |
+
continue
|
| 281 |
+
except (ValueError, TypeError):
|
| 282 |
+
continue
|
| 283 |
+
|
| 284 |
# Device filter
|
| 285 |
if device != "All":
|
| 286 |
row_device = (row.get("device") or "").lower()
|
|
|
|
| 318 |
return indices, table_data
|
| 319 |
|
| 320 |
|
| 321 |
+
def build_profiling_plots_handler(all_rows, filtered_indices):
|
| 322 |
+
"""Build GPU and CPU linear regression scatter plots from filtered data."""
|
| 323 |
+
if not all_rows or not filtered_indices:
|
| 324 |
+
return gr.update(visible=False), gr.update(visible=False)
|
| 325 |
+
|
| 326 |
+
import matplotlib
|
| 327 |
+
matplotlib.use("Agg")
|
| 328 |
+
import matplotlib.pyplot as plt
|
| 329 |
+
|
| 330 |
+
# Collect data points from filtered rows
|
| 331 |
+
gpu_rows = [] # (audio_dur, vad_gpu, asr_gpu, asr_model)
|
| 332 |
+
cpu_rows = []
|
| 333 |
+
|
| 334 |
+
for i in filtered_indices:
|
| 335 |
+
row = all_rows[i]
|
| 336 |
+
audio_dur = row.get("audio_duration_s")
|
| 337 |
+
vad_gpu = row.get("vad_gpu_time")
|
| 338 |
+
asr_gpu = row.get("asr_gpu_time")
|
| 339 |
+
device = (row.get("device") or "").lower()
|
| 340 |
+
asr_model = row.get("asr_model", "")
|
| 341 |
+
|
| 342 |
+
if audio_dur is None or audio_dur <= 0:
|
| 343 |
+
continue
|
| 344 |
+
|
| 345 |
+
entry = (audio_dur, vad_gpu, asr_gpu, asr_model)
|
| 346 |
+
if device in ("cuda", "gpu"):
|
| 347 |
+
gpu_rows.append(entry)
|
| 348 |
+
elif device == "cpu":
|
| 349 |
+
cpu_rows.append(entry)
|
| 350 |
+
|
| 351 |
+
def _build_figure(rows, title):
|
| 352 |
+
"""Build a dual y-axis scatter + regression figure for one device type."""
|
| 353 |
+
if not rows:
|
| 354 |
+
return None
|
| 355 |
+
|
| 356 |
+
# Split series
|
| 357 |
+
vad_x, vad_y = [], []
|
| 358 |
+
asr_base_x, asr_base_y = [], []
|
| 359 |
+
asr_large_x, asr_large_y = [], []
|
| 360 |
+
|
| 361 |
+
for audio_dur, vad_t, asr_t, model in rows:
|
| 362 |
+
if vad_t is not None and vad_t > 0:
|
| 363 |
+
vad_x.append(audio_dur)
|
| 364 |
+
vad_y.append(vad_t)
|
| 365 |
+
if asr_t is not None and asr_t > 0:
|
| 366 |
+
if model == "Base":
|
| 367 |
+
asr_base_x.append(audio_dur)
|
| 368 |
+
asr_base_y.append(asr_t)
|
| 369 |
+
elif model == "Large":
|
| 370 |
+
asr_large_x.append(audio_dur)
|
| 371 |
+
asr_large_y.append(asr_t)
|
| 372 |
+
|
| 373 |
+
if not vad_x and not asr_base_x and not asr_large_x:
|
| 374 |
+
return None
|
| 375 |
+
|
| 376 |
+
fig, ax_vad = plt.subplots(figsize=(7, 4.5))
|
| 377 |
+
ax_asr = ax_vad.twinx()
|
| 378 |
+
|
| 379 |
+
handles, labels = [], []
|
| 380 |
+
|
| 381 |
+
# VAD series (left y-axis, blue)
|
| 382 |
+
if vad_x:
|
| 383 |
+
s = ax_vad.scatter(vad_x, vad_y, color="#4a9eff", alpha=0.5, s=20, zorder=3)
|
| 384 |
+
handles.append(s)
|
| 385 |
+
if len(vad_x) >= 2:
|
| 386 |
+
coeffs = np.polyfit(vad_x, vad_y, 1)
|
| 387 |
+
x_line = np.array([min(vad_x), max(vad_x)])
|
| 388 |
+
y_line = np.polyval(coeffs, x_line)
|
| 389 |
+
line, = ax_vad.plot(x_line, y_line, color="#4a9eff", linewidth=1.5, zorder=4)
|
| 390 |
+
labels.append(f"VAD: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
|
| 391 |
+
else:
|
| 392 |
+
labels.append("VAD")
|
| 393 |
+
|
| 394 |
+
# ASR Base series (right y-axis, orange)
|
| 395 |
+
if asr_base_x:
|
| 396 |
+
s = ax_asr.scatter(asr_base_x, asr_base_y, color="#f0ad4e", alpha=0.5, s=20, marker="^", zorder=3)
|
| 397 |
+
handles.append(s)
|
| 398 |
+
if len(asr_base_x) >= 2:
|
| 399 |
+
coeffs = np.polyfit(asr_base_x, asr_base_y, 1)
|
| 400 |
+
x_line = np.array([min(asr_base_x), max(asr_base_x)])
|
| 401 |
+
y_line = np.polyval(coeffs, x_line)
|
| 402 |
+
ax_asr.plot(x_line, y_line, color="#f0ad4e", linewidth=1.5, zorder=4)
|
| 403 |
+
labels.append(f"ASR Base: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
|
| 404 |
+
else:
|
| 405 |
+
labels.append("ASR Base")
|
| 406 |
+
|
| 407 |
+
# ASR Large series (right y-axis, red)
|
| 408 |
+
if asr_large_x:
|
| 409 |
+
s = ax_asr.scatter(asr_large_x, asr_large_y, color="#d9534f", alpha=0.5, s=20, marker="s", zorder=3)
|
| 410 |
+
handles.append(s)
|
| 411 |
+
if len(asr_large_x) >= 2:
|
| 412 |
+
coeffs = np.polyfit(asr_large_x, asr_large_y, 1)
|
| 413 |
+
x_line = np.array([min(asr_large_x), max(asr_large_x)])
|
| 414 |
+
y_line = np.polyval(coeffs, x_line)
|
| 415 |
+
ax_asr.plot(x_line, y_line, color="#d9534f", linewidth=1.5, zorder=4)
|
| 416 |
+
labels.append(f"ASR Large: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
|
| 417 |
+
else:
|
| 418 |
+
labels.append("ASR Large")
|
| 419 |
+
|
| 420 |
+
ax_vad.set_xlabel("Audio Duration (s)")
|
| 421 |
+
ax_vad.set_ylabel("VAD Time (s)", color="#4a9eff")
|
| 422 |
+
ax_asr.set_ylabel("ASR Time (s)", color="#f0ad4e")
|
| 423 |
+
ax_vad.tick_params(axis="y", labelcolor="#4a9eff")
|
| 424 |
+
ax_asr.tick_params(axis="y", labelcolor="#f0ad4e")
|
| 425 |
+
ax_vad.set_title(title)
|
| 426 |
+
|
| 427 |
+
if handles:
|
| 428 |
+
fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0.12, 0.88),
|
| 429 |
+
fontsize=8, framealpha=0.8)
|
| 430 |
+
|
| 431 |
+
fig.tight_layout()
|
| 432 |
+
return fig
|
| 433 |
+
|
| 434 |
+
gpu_fig = _build_figure(gpu_rows, "GPU: Audio Duration vs Processing Time")
|
| 435 |
+
cpu_fig = _build_figure(cpu_rows, "CPU: Audio Duration vs Processing Time")
|
| 436 |
+
|
| 437 |
+
gpu_update = gr.update(value=gpu_fig, visible=True) if gpu_fig else gr.update(visible=False)
|
| 438 |
+
cpu_update = gr.update(value=cpu_fig, visible=True) if cpu_fig else gr.update(visible=False)
|
| 439 |
+
|
| 440 |
+
# Close figures to free memory
|
| 441 |
+
plt.close("all")
|
| 442 |
+
|
| 443 |
+
return gpu_update, cpu_update
|
| 444 |
+
|
| 445 |
+
|
| 446 |
def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
|
| 447 |
+
"""When a table row is clicked, download audio, render segments, inject timestamps if available.
|
| 448 |
+
|
| 449 |
+
Returns 6-tuple: (dev_detail_html, dev_json_output, dev_segment_dir,
|
| 450 |
+
dev_compute_ts_btn, dev_animate_all_html, dev_compute_ts_progress)
|
| 451 |
+
"""
|
| 452 |
+
_empty = ("", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
| 453 |
if not all_rows or not filtered_indices:
|
| 454 |
+
return _empty
|
| 455 |
|
| 456 |
display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
|
| 457 |
if display_idx < 0 or display_idx >= len(filtered_indices):
|
| 458 |
+
return _empty
|
| 459 |
|
| 460 |
row_idx = filtered_indices[display_idx]
|
| 461 |
row = all_rows[row_idx]
|
|
|
|
| 466 |
# Build summary HTML
|
| 467 |
summary_html = _build_summary_html(row, surah_names)
|
| 468 |
|
| 469 |
+
# Reconstruct and render segments
|
| 470 |
+
html, json_segments, segment_dir = _build_segments_from_log(row, audio_id)
|
| 471 |
+
html = summary_html + html
|
| 472 |
+
|
| 473 |
+
# Check if timestamps exist in the log
|
| 474 |
+
has_ts = bool(row.get("word_timestamps"))
|
| 475 |
+
|
| 476 |
+
if has_ts and json_segments:
|
| 477 |
+
try:
|
| 478 |
+
from src.mfa import inject_timestamps_into_html
|
| 479 |
+
|
| 480 |
+
results = _log_timestamps_to_mfa_results(
|
| 481 |
+
row.get("word_timestamps"), row.get("char_timestamps")
|
| 482 |
+
)
|
| 483 |
+
seg_to_result_idx = _build_seg_to_result_idx_from_log(json_segments, results)
|
| 484 |
+
enriched_html, enriched_json = inject_timestamps_into_html(
|
| 485 |
+
html, json_segments, results, seg_to_result_idx,
|
| 486 |
+
str(segment_dir) if segment_dir else None,
|
| 487 |
+
)
|
| 488 |
+
animate_btn = '<button class="animate-all-btn">Animate All</button>'
|
| 489 |
+
return (
|
| 490 |
+
enriched_html,
|
| 491 |
+
enriched_json,
|
| 492 |
+
str(segment_dir) if segment_dir else None,
|
| 493 |
+
gr.update(visible=False, interactive=False),
|
| 494 |
+
gr.update(value=animate_btn, visible=True),
|
| 495 |
+
gr.update(visible=False),
|
| 496 |
+
)
|
| 497 |
+
except Exception as e:
|
| 498 |
+
print(f"[dev_tools] Timestamp injection from log failed: {e}")
|
| 499 |
+
import traceback
|
| 500 |
+
traceback.print_exc()
|
| 501 |
+
# Fall through to non-timestamp path
|
| 502 |
+
|
| 503 |
+
# No timestamps — build basic json_output and show Compute Timestamps button
|
| 504 |
+
json_output = {"segments": json_segments} if json_segments else None
|
| 505 |
+
has_audio = segment_dir is not None
|
| 506 |
+
return (
|
| 507 |
+
html,
|
| 508 |
+
json_output,
|
| 509 |
+
str(segment_dir) if segment_dir else None,
|
| 510 |
+
gr.update(visible=has_audio, interactive=has_audio),
|
| 511 |
+
gr.update(visible=False),
|
| 512 |
+
gr.update(visible=False),
|
| 513 |
+
)
|
| 514 |
|
| 515 |
|
| 516 |
# ── Summary HTML builder ───────────────────────────────────────────────
|
|
|
|
| 592 |
|
| 593 |
# ── Segment reconstruction from log ───────────────────────────────────
|
| 594 |
|
| 595 |
+
def _build_segments_from_log(row, audio_id):
|
| 596 |
+
"""Build segment cards from the log's segments JSON, downloading audio on demand.
|
| 597 |
+
|
| 598 |
+
Returns (html, json_segments, segment_dir) where json_segments is a list
|
| 599 |
+
of dicts compatible with the MFA/timestamp pipeline.
|
| 600 |
+
"""
|
| 601 |
segments_str = row.get("segments")
|
| 602 |
+
_empty = ('<div style="color: #999; padding: 20px;">No segment data in this log row.</div>', [], None)
|
| 603 |
if not segments_str:
|
| 604 |
+
return _empty
|
| 605 |
|
| 606 |
try:
|
| 607 |
runs = json.loads(segments_str)
|
| 608 |
except (json.JSONDecodeError, TypeError):
|
| 609 |
+
return ('<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>', [], None)
|
| 610 |
|
| 611 |
if not runs or not isinstance(runs, list):
|
| 612 |
+
return ('<div style="color: #999; padding: 20px;">Empty segment runs.</div>', [], None)
|
| 613 |
|
| 614 |
# Use the last run (most recent alignment pass)
|
| 615 |
last_run = runs[-1]
|
| 616 |
seg_list = last_run.get("segments", [])
|
| 617 |
if not seg_list:
|
| 618 |
+
return ('<div style="color: #999; padding: 20px;">No segments in last run.</div>', [], None)
|
| 619 |
|
| 620 |
# Try to download audio for this specific row
|
| 621 |
audio_int16 = None
|
|
|
|
| 627 |
except Exception as e:
|
| 628 |
print(f"[dev_tools] Audio download failed: {e}")
|
| 629 |
|
| 630 |
+
# Build SegmentInfo objects and json_segments in parallel
|
| 631 |
from src.core.segment_types import SegmentInfo
|
| 632 |
from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
|
| 633 |
from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
|
| 634 |
|
| 635 |
segments = []
|
| 636 |
+
json_segments = []
|
| 637 |
+
for seg_idx, seg_data in enumerate(seg_list):
|
| 638 |
ref = seg_data.get("ref", "")
|
| 639 |
confidence = seg_data.get("confidence", 0.0) or 0.0
|
| 640 |
start = seg_data.get("start", 0.0) or 0.0
|
|
|
|
| 643 |
special_type = seg_data.get("special_type", "")
|
| 644 |
duration = end - start
|
| 645 |
|
| 646 |
+
# Parse ref into ref_from/ref_to/special_type
|
| 647 |
+
if ref in ALL_SPECIAL_REFS:
|
| 648 |
+
ref_from, ref_to, parsed_special = "", "", ref
|
| 649 |
+
elif "-" in ref:
|
| 650 |
+
ref_from, ref_to = ref.split("-", 1)
|
| 651 |
+
parsed_special = ""
|
| 652 |
+
else:
|
| 653 |
+
ref_from = ref_to = ref
|
| 654 |
+
parsed_special = ""
|
| 655 |
+
|
| 656 |
# Reconstruct matched_text
|
| 657 |
matched_text = ""
|
| 658 |
if ref in ALL_SPECIAL_REFS:
|
|
|
|
| 659 |
if ref in SPECIAL_TEXT:
|
| 660 |
matched_text = SPECIAL_TEXT[ref]
|
| 661 |
elif ref:
|
|
|
|
| 682 |
)
|
| 683 |
segments.append(seg_info)
|
| 684 |
|
| 685 |
+
json_segments.append({
|
| 686 |
+
"segment": seg_idx + 1,
|
| 687 |
+
"ref_from": ref_from,
|
| 688 |
+
"ref_to": ref_to,
|
| 689 |
+
"time_from": start,
|
| 690 |
+
"time_to": end,
|
| 691 |
+
"confidence": confidence,
|
| 692 |
+
"special_type": parsed_special,
|
| 693 |
+
"matched_text": matched_text,
|
| 694 |
+
"error": error,
|
| 695 |
+
"has_missing_words": has_missing,
|
| 696 |
+
})
|
| 697 |
+
|
| 698 |
if not segments:
|
| 699 |
+
return ('<div style="color: #999; padding: 20px;">No valid segments to display.</div>', [], None)
|
| 700 |
|
| 701 |
+
html = render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
|
| 702 |
+
segment_dir=segment_dir, skip_full_audio=True)
|
| 703 |
+
return html, json_segments, segment_dir
|
| 704 |
|
| 705 |
|
| 706 |
def _download_audio_for_row(audio_id: str):
|
|
|
|
| 713 |
raise ValueError("No HF token")
|
| 714 |
|
| 715 |
from datasets import load_dataset
|
|
|
|
| 716 |
|
| 717 |
ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
|
| 718 |
split="train", streaming=True)
|
|
|
|
| 727 |
audio_array = audio_data["array"]
|
| 728 |
sr = audio_data["sampling_rate"]
|
| 729 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
# Convert to int16
|
| 731 |
audio_float = np.clip(audio_array, -1.0, 1.0)
|
| 732 |
audio_int16 = (audio_float * 32767).astype(np.int16)
|
| 733 |
|
| 734 |
+
# Clean up old dev segment directories
|
| 735 |
+
for old_dir in SEGMENT_AUDIO_DIR.glob("dev_*"):
|
| 736 |
+
if old_dir.is_dir():
|
| 737 |
+
shutil.rmtree(old_dir, ignore_errors=True)
|
| 738 |
+
|
| 739 |
# Create segment directory
|
| 740 |
segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
|
| 741 |
segment_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 743 |
return audio_int16, sr, segment_dir
|
| 744 |
|
| 745 |
raise ValueError(f"Audio ID '{audio_id}' not found in dataset")
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
# ── Log timestamps → MFA results conversion ──────────────────────────
|
| 749 |
+
|
| 750 |
+
def _log_timestamps_to_mfa_results(word_ts_json, char_ts_json):
|
| 751 |
+
"""Convert logged timestamp format to MFA results format.
|
| 752 |
+
|
| 753 |
+
Log char_timestamps: [{ref, words: [{word, location, letters: [{char, start, end}]}]}]
|
| 754 |
+
MFA results format: [{status: "ok", ref, words: [{word, location, start, end, letters: [...]}]}]
|
| 755 |
+
"""
|
| 756 |
+
char_ts = json.loads(char_ts_json) if char_ts_json else []
|
| 757 |
+
word_ts = json.loads(word_ts_json) if word_ts_json else []
|
| 758 |
+
|
| 759 |
+
# Build word-level start/end lookup from word_timestamps
|
| 760 |
+
word_lookup = {} # {ref: {word_idx: (start, end)}}
|
| 761 |
+
for entry in word_ts:
|
| 762 |
+
ref = entry.get("ref", "")
|
| 763 |
+
for widx, w in enumerate(entry.get("words", [])):
|
| 764 |
+
if w.get("start") is not None and w.get("end") is not None:
|
| 765 |
+
word_lookup.setdefault(ref, {})[widx] = (w["start"], w["end"])
|
| 766 |
+
|
| 767 |
+
results = []
|
| 768 |
+
|
| 769 |
+
if char_ts:
|
| 770 |
+
# Primary path: use char_timestamps (has location + letters)
|
| 771 |
+
for entry in char_ts:
|
| 772 |
+
ref = entry.get("ref", "")
|
| 773 |
+
ref_word_lookup = word_lookup.get(ref, {})
|
| 774 |
+
words = []
|
| 775 |
+
for widx, w in enumerate(entry.get("words", [])):
|
| 776 |
+
word_start, word_end = ref_word_lookup.get(widx, (None, None))
|
| 777 |
+
letters = w.get("letters", [])
|
| 778 |
+
# Infer word start/end from letters if not in word_timestamps
|
| 779 |
+
if word_start is None and letters:
|
| 780 |
+
starts = [lt["start"] for lt in letters if lt.get("start") is not None]
|
| 781 |
+
ends = [lt["end"] for lt in letters if lt.get("end") is not None]
|
| 782 |
+
if starts and ends:
|
| 783 |
+
word_start = min(starts)
|
| 784 |
+
word_end = max(ends)
|
| 785 |
+
words.append({
|
| 786 |
+
"word": w.get("word", ""),
|
| 787 |
+
"location": w.get("location", ""),
|
| 788 |
+
"start": word_start,
|
| 789 |
+
"end": word_end,
|
| 790 |
+
"letters": letters,
|
| 791 |
+
})
|
| 792 |
+
results.append({"status": "ok", "ref": ref, "words": words})
|
| 793 |
+
elif word_ts:
|
| 794 |
+
# Fallback: word_timestamps only (no letters)
|
| 795 |
+
for entry in word_ts:
|
| 796 |
+
ref = entry.get("ref", "")
|
| 797 |
+
words = []
|
| 798 |
+
for w in entry.get("words", []):
|
| 799 |
+
words.append({
|
| 800 |
+
"word": w.get("word", ""),
|
| 801 |
+
"location": "",
|
| 802 |
+
"start": w.get("start"),
|
| 803 |
+
"end": w.get("end"),
|
| 804 |
+
"letters": [],
|
| 805 |
+
})
|
| 806 |
+
results.append({"status": "ok", "ref": ref, "words": words})
|
| 807 |
+
|
| 808 |
+
return results
|
| 809 |
+
|
| 810 |
+
|
| 811 |
+
def _build_seg_to_result_idx_from_log(json_segments, results):
|
| 812 |
+
"""Map segment indices to MFA result indices by matching refs."""
|
| 813 |
+
from src.mfa import _build_mfa_ref
|
| 814 |
+
|
| 815 |
+
# Build ref → result index lookup
|
| 816 |
+
ref_to_result = {}
|
| 817 |
+
for i, r in enumerate(results):
|
| 818 |
+
ref = r.get("ref", "")
|
| 819 |
+
if ref:
|
| 820 |
+
ref_to_result[ref] = i
|
| 821 |
+
|
| 822 |
+
seg_to_result_idx = {}
|
| 823 |
+
for seg in json_segments:
|
| 824 |
+
mfa_ref = _build_mfa_ref(seg)
|
| 825 |
+
if mfa_ref is None:
|
| 826 |
+
continue
|
| 827 |
+
seg_idx = seg.get("segment", 0) - 1
|
| 828 |
+
result_idx = ref_to_result.get(mfa_ref)
|
| 829 |
+
if result_idx is not None:
|
| 830 |
+
seg_to_result_idx[seg_idx] = result_idx
|
| 831 |
+
|
| 832 |
+
return seg_to_result_idx
|
src/ui/event_wiring.py
CHANGED
|
@@ -505,41 +505,70 @@ def _wire_dev_tab(c):
|
|
| 505 |
"""Wire dev tab event handlers."""
|
| 506 |
from src.ui.dev_tools import (
|
| 507 |
load_logs_handler, filter_and_sort_handler, select_log_row_handler,
|
|
|
|
| 508 |
)
|
| 509 |
|
| 510 |
# Load / Refresh buttons
|
| 511 |
_load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
|
| 512 |
|
|
|
|
|
|
|
| 513 |
c.dev_load_btn.click(
|
| 514 |
fn=load_logs_handler,
|
| 515 |
inputs=[],
|
| 516 |
outputs=_load_outputs,
|
| 517 |
api_name=False, show_progress="minimal",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
)
|
| 519 |
c.dev_refresh_btn.click(
|
| 520 |
fn=load_logs_handler,
|
| 521 |
inputs=[],
|
| 522 |
outputs=_load_outputs,
|
| 523 |
api_name=False, show_progress="minimal",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
)
|
| 525 |
|
| 526 |
# Filter / Sort changes
|
| 527 |
_filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
|
| 528 |
-
c.dev_filter_status, c.dev_sort]
|
| 529 |
_filter_outputs = [c.dev_filtered_indices, c.dev_table]
|
| 530 |
|
| 531 |
-
for component in [c.dev_filter_device, c.dev_filter_model,
|
|
|
|
| 532 |
component.change(
|
| 533 |
fn=filter_and_sort_handler,
|
| 534 |
inputs=_filter_inputs,
|
| 535 |
outputs=_filter_outputs,
|
| 536 |
api_name=False, show_progress="hidden",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
)
|
| 538 |
|
| 539 |
-
# Table row selection
|
| 540 |
c.dev_table.select(
|
| 541 |
fn=select_log_row_handler,
|
| 542 |
inputs=[c.dev_all_rows, c.dev_filtered_indices],
|
| 543 |
-
outputs=[c.dev_detail_html
|
|
|
|
| 544 |
api_name=False, show_progress="minimal",
|
| 545 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
"""Wire dev tab event handlers."""
|
| 506 |
from src.ui.dev_tools import (
|
| 507 |
load_logs_handler, filter_and_sort_handler, select_log_row_handler,
|
| 508 |
+
build_profiling_plots_handler,
|
| 509 |
)
|
| 510 |
|
| 511 |
# Load / Refresh buttons
|
| 512 |
_load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
|
| 513 |
|
| 514 |
+
_plot_outputs = [c.dev_gpu_plot, c.dev_cpu_plot]
|
| 515 |
+
|
| 516 |
c.dev_load_btn.click(
|
| 517 |
fn=load_logs_handler,
|
| 518 |
inputs=[],
|
| 519 |
outputs=_load_outputs,
|
| 520 |
api_name=False, show_progress="minimal",
|
| 521 |
+
).then(
|
| 522 |
+
fn=build_profiling_plots_handler,
|
| 523 |
+
inputs=[c.dev_all_rows, c.dev_filtered_indices],
|
| 524 |
+
outputs=_plot_outputs,
|
| 525 |
+
show_progress="hidden",
|
| 526 |
)
|
| 527 |
c.dev_refresh_btn.click(
|
| 528 |
fn=load_logs_handler,
|
| 529 |
inputs=[],
|
| 530 |
outputs=_load_outputs,
|
| 531 |
api_name=False, show_progress="minimal",
|
| 532 |
+
).then(
|
| 533 |
+
fn=build_profiling_plots_handler,
|
| 534 |
+
inputs=[c.dev_all_rows, c.dev_filtered_indices],
|
| 535 |
+
outputs=_plot_outputs,
|
| 536 |
+
show_progress="hidden",
|
| 537 |
)
|
| 538 |
|
| 539 |
# Filter / Sort changes
|
| 540 |
_filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
|
| 541 |
+
c.dev_filter_status, c.dev_sort, c.dev_days_filter]
|
| 542 |
_filter_outputs = [c.dev_filtered_indices, c.dev_table]
|
| 543 |
|
| 544 |
+
for component in [c.dev_filter_device, c.dev_filter_model,
|
| 545 |
+
c.dev_filter_status, c.dev_sort, c.dev_days_filter]:
|
| 546 |
component.change(
|
| 547 |
fn=filter_and_sort_handler,
|
| 548 |
inputs=_filter_inputs,
|
| 549 |
outputs=_filter_outputs,
|
| 550 |
api_name=False, show_progress="hidden",
|
| 551 |
+
).then(
|
| 552 |
+
fn=build_profiling_plots_handler,
|
| 553 |
+
inputs=[c.dev_all_rows, c.dev_filtered_indices],
|
| 554 |
+
outputs=_plot_outputs,
|
| 555 |
+
show_progress="hidden",
|
| 556 |
)
|
| 557 |
|
| 558 |
+
# Table row selection — returns 6-tuple with timestamps + controls
|
| 559 |
c.dev_table.select(
|
| 560 |
fn=select_log_row_handler,
|
| 561 |
inputs=[c.dev_all_rows, c.dev_filtered_indices],
|
| 562 |
+
outputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir,
|
| 563 |
+
c.dev_compute_ts_btn, c.dev_animate_all_html, c.dev_compute_ts_progress],
|
| 564 |
api_name=False, show_progress="minimal",
|
| 565 |
)
|
| 566 |
+
|
| 567 |
+
# Compute Timestamps button — uses same MFA flow as main tab
|
| 568 |
+
c.dev_compute_ts_btn.click(
|
| 569 |
+
fn=compute_mfa_timestamps,
|
| 570 |
+
inputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir],
|
| 571 |
+
outputs=[c.dev_detail_html, c.dev_compute_ts_btn, c.dev_animate_all_html,
|
| 572 |
+
c.dev_compute_ts_progress, c.dev_json_output],
|
| 573 |
+
api_name=False, show_progress="hidden",
|
| 574 |
+
)
|
src/ui/interface.py
CHANGED
|
@@ -42,7 +42,7 @@ def build_interface():
|
|
| 42 |
gr.Markdown("""
|
| 43 |
- Transcribe and split any recitation by pauses within 1-2 minutes
|
| 44 |
- Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
|
| 45 |
-
- GPU-powered API usage with daily quotas, and unlimited CPU usage
|
| 46 |
- Reliable confidence system to flag uncertain segments and missed words — no silent errors
|
| 47 |
- Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
|
| 48 |
- Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
|
|
@@ -54,9 +54,18 @@ def build_interface():
|
|
| 54 |
with gr.Accordion("\U0001f4e1 API Usage", open=False):
|
| 55 |
gr.Markdown(_api_doc)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# State components for caching VAD data between runs
|
| 62 |
c.cached_speech_intervals = gr.State(value=None)
|
|
@@ -206,16 +215,9 @@ def _build_animation_settings(c):
|
|
| 206 |
|
| 207 |
|
| 208 |
def _build_right_column(c):
|
| 209 |
-
"""Build the right output column
|
| 210 |
with gr.Column(scale=RIGHT_COLUMN_SCALE):
|
| 211 |
-
|
| 212 |
-
with gr.Tabs():
|
| 213 |
-
with gr.Tab("Results"):
|
| 214 |
-
_build_results_content(c)
|
| 215 |
-
with gr.Tab("Dev"):
|
| 216 |
-
_build_dev_tab(c)
|
| 217 |
-
else:
|
| 218 |
-
_build_results_content(c)
|
| 219 |
|
| 220 |
|
| 221 |
def _build_results_content(c):
|
|
|
|
| 42 |
gr.Markdown("""
|
| 43 |
- Transcribe and split any recitation by pauses within 1-2 minutes
|
| 44 |
- Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
|
| 45 |
+
- GPU-powered [API usage](https://huggingface.co/spaces/hetchyy/Quran-multi-aligner/blob/main/docs/client_api.md) with daily quotas, and unlimited CPU usage
|
| 46 |
- Reliable confidence system to flag uncertain segments and missed words — no silent errors
|
| 47 |
- Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
|
| 48 |
- Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
|
|
|
|
| 54 |
with gr.Accordion("\U0001f4e1 API Usage", open=False):
|
| 55 |
gr.Markdown(_api_doc)
|
| 56 |
|
| 57 |
+
if DEV_TAB_VISIBLE:
|
| 58 |
+
with gr.Tabs():
|
| 59 |
+
with gr.Tab("Results"):
|
| 60 |
+
with gr.Row(elem_id="main-row"):
|
| 61 |
+
_build_left_column(c)
|
| 62 |
+
_build_right_column(c)
|
| 63 |
+
with gr.Tab("Dev"):
|
| 64 |
+
_build_dev_tab(c)
|
| 65 |
+
else:
|
| 66 |
+
with gr.Row(elem_id="main-row"):
|
| 67 |
+
_build_left_column(c)
|
| 68 |
+
_build_right_column(c)
|
| 69 |
|
| 70 |
# State components for caching VAD data between runs
|
| 71 |
c.cached_speech_intervals = gr.State(value=None)
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
def _build_right_column(c):
|
| 218 |
+
"""Build the right output column."""
|
| 219 |
with gr.Column(scale=RIGHT_COLUMN_SCALE):
|
| 220 |
+
_build_results_content(c)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
|
| 223 |
def _build_results_content(c):
|
src/ui/segments.py
CHANGED
|
@@ -373,7 +373,7 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
|
|
| 373 |
return html
|
| 374 |
|
| 375 |
|
| 376 |
-
def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None) -> str:
|
| 377 |
"""Render all segments as HTML with optional audio players.
|
| 378 |
|
| 379 |
Args:
|
|
@@ -390,7 +390,7 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
|
|
| 390 |
|
| 391 |
# Write full audio file for unified megacard playback
|
| 392 |
full_audio_url = ""
|
| 393 |
-
if audio_int16 is not None and sample_rate > 0 and segment_dir:
|
| 394 |
full_path = segment_dir / "full.wav"
|
| 395 |
with wave.open(str(full_path), 'wb') as wf:
|
| 396 |
wf.setnchannels(1)
|
|
|
|
| 373 |
return html
|
| 374 |
|
| 375 |
|
| 376 |
+
def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None, skip_full_audio: bool = False) -> str:
|
| 377 |
"""Render all segments as HTML with optional audio players.
|
| 378 |
|
| 379 |
Args:
|
|
|
|
| 390 |
|
| 391 |
# Write full audio file for unified megacard playback
|
| 392 |
full_audio_url = ""
|
| 393 |
+
if audio_int16 is not None and sample_rate > 0 and segment_dir and not skip_full_audio:
|
| 394 |
full_path = segment_dir / "full.wav"
|
| 395 |
with wave.open(str(full_path), 'wb') as wf:
|
| 396 |
wf.setnchannels(1)
|