hetchyy commited on
Commit
5df5324
·
1 Parent(s): c431bc9

Update dev tools

Browse files
Files changed (5) hide show
  1. src/mfa.py +74 -58
  2. src/ui/dev_tools.py +348 -26
  3. src/ui/event_wiring.py +33 -4
  4. src/ui/interface.py +15 -13
  5. src/ui/segments.py +2 -2
src/mfa.py CHANGED
@@ -683,16 +683,81 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
683
  )
684
  raise
685
 
686
- # Build timestamp lookups using shared helper
687
- word_timestamps, letter_timestamps, word_to_all_results = _build_timestamp_lookups(results)
 
688
 
689
- # Build cross-word groups using shared helper
690
- crossword_groups = _build_crossword_groups(results, letter_timestamps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
692
- # Extend word timestamps using shared helper
693
- _extend_word_timestamps(word_timestamps, segments, seg_to_result_idx, results, segment_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- # --- HTML injection (UI-only, not shared with API) ---
 
 
 
 
 
 
 
 
696
 
697
  # Inject timestamps into word spans, using segment boundaries to determine result_idx
698
  seg_boundaries = []
@@ -754,8 +819,6 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
754
  html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
755
 
756
  # Stamp char spans with MFA letter timestamps
757
- import unicodedata
758
-
759
  def _stamp_chars_with_mfa(word_m):
760
  word_open = word_m.group(1)
761
  word_abs_start = float(word_m.group(2))
@@ -873,57 +936,10 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
873
 
874
  print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
875
 
876
- # Log word and char timestamps to usage logger
877
- if cached_log_row is not None:
878
- try:
879
- import json as _json
880
- from src.core.usage_logger import update_word_timestamps
881
- _ts_log = []
882
- _char_ts_log = []
883
- for result in results:
884
- if result.get("status") != "ok":
885
- continue
886
- _ts_log.append({
887
- "ref": result.get("ref", ""),
888
- "words": [
889
- {"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
890
- for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
891
- ],
892
- })
893
- _char_ts_log.append({
894
- "ref": result.get("ref", ""),
895
- "words": [
896
- {
897
- "word": w.get("word", ""),
898
- "location": w.get("location", ""),
899
- "letters": [
900
- {"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
901
- for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
902
- ],
903
- }
904
- for w in result.get("words", []) if w.get("letters")
905
- ],
906
- })
907
- update_word_timestamps(
908
- cached_log_row,
909
- _json.dumps(_ts_log),
910
- _json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
911
- )
912
- except Exception as e:
913
- print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
914
-
915
- # Build enriched JSON using shared helper (UI always includes letters)
916
  enriched_json = _build_enriched_json(
917
  segments, results, seg_to_result_idx,
918
  word_timestamps, letter_timestamps, "words+chars",
919
  )
920
 
921
- # Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
922
- animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
923
- yield (
924
- html,
925
- gr.update(visible=False),
926
- gr.update(value=animate_all_btn_html, visible=True),
927
- gr.update(visible=False),
928
- enriched_json,
929
- )
 
683
  )
684
  raise
685
 
686
+ html, enriched_json = inject_timestamps_into_html(
687
+ current_html, segments, results, seg_to_result_idx, segment_dir
688
+ )
689
 
690
+ # Log word and char timestamps to usage logger
691
+ if cached_log_row is not None:
692
+ try:
693
+ import json as _json
694
+ from src.core.usage_logger import update_word_timestamps
695
+ _ts_log = []
696
+ _char_ts_log = []
697
+ for result in results:
698
+ if result.get("status") != "ok":
699
+ continue
700
+ _ts_log.append({
701
+ "ref": result.get("ref", ""),
702
+ "words": [
703
+ {"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
704
+ for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
705
+ ],
706
+ })
707
+ _char_ts_log.append({
708
+ "ref": result.get("ref", ""),
709
+ "words": [
710
+ {
711
+ "word": w.get("word", ""),
712
+ "location": w.get("location", ""),
713
+ "letters": [
714
+ {"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
715
+ for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
716
+ ],
717
+ }
718
+ for w in result.get("words", []) if w.get("letters")
719
+ ],
720
+ })
721
+ update_word_timestamps(
722
+ cached_log_row,
723
+ _json.dumps(_ts_log),
724
+ _json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
725
+ )
726
+ except Exception as e:
727
+ print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
728
 
729
+ # Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
730
+ animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
731
+ yield (
732
+ html,
733
+ gr.update(visible=False),
734
+ gr.update(value=animate_all_btn_html, visible=True),
735
+ gr.update(visible=False),
736
+ enriched_json,
737
+ )
738
+
739
+
740
+ # ---------------------------------------------------------------------------
741
+ # Reusable HTML timestamp injection (shared by UI generator and Dev tab)
742
+ # ---------------------------------------------------------------------------
743
+
744
+ def inject_timestamps_into_html(current_html, segments, results, seg_to_result_idx, segment_dir):
745
+ """Inject word and char timestamps into rendered segment HTML.
746
+
747
+ Builds lookups, cross-word groups, extends timestamps, then performs
748
+ regex-based injection of data-start/data-end attributes into word and
749
+ char spans. Reusable by both the main MFA flow and the Dev tab
750
+ log-based flow.
751
 
752
+ Returns (enriched_html, enriched_json).
753
+ """
754
+ import re
755
+ import unicodedata
756
+
757
+ # Build timestamp lookups
758
+ word_timestamps, letter_timestamps, word_to_all_results = _build_timestamp_lookups(results)
759
+ crossword_groups = _build_crossword_groups(results, letter_timestamps)
760
+ _extend_word_timestamps(word_timestamps, segments, seg_to_result_idx, results, segment_dir)
761
 
762
  # Inject timestamps into word spans, using segment boundaries to determine result_idx
763
  seg_boundaries = []
 
819
  html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
820
 
821
  # Stamp char spans with MFA letter timestamps
 
 
822
  def _stamp_chars_with_mfa(word_m):
823
  word_open = word_m.group(1)
824
  word_abs_start = float(word_m.group(2))
 
936
 
937
  print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
938
 
939
+ # Build enriched JSON (UI always includes letters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  enriched_json = _build_enriched_json(
941
  segments, results, seg_to_result_idx,
942
  word_timestamps, letter_timestamps, "words+chars",
943
  )
944
 
945
+ return html, enriched_json
 
 
 
 
 
 
 
 
src/ui/dev_tools.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  import json
4
  import os
 
5
  import uuid
6
  from datetime import datetime, timezone
7
  from pathlib import Path
@@ -103,6 +104,9 @@ def build_dev_tab_ui(c):
103
  c.dev_sort = gr.Dropdown(
104
  choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
105
  )
 
 
 
106
 
107
  c.dev_table = gr.Dataframe(
108
  headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
@@ -114,11 +118,23 @@ def build_dev_tab_ui(c):
114
  wrap=True,
115
  )
116
 
 
 
 
 
117
  c.dev_detail_html = gr.HTML(value="", label="Log Detail")
118
 
 
 
 
 
 
 
119
  # State
120
  c.dev_all_rows = gr.State(value=[])
121
  c.dev_filtered_indices = gr.State(value=[])
 
 
122
 
123
 
124
  # ── Row extraction ─────────────────────────────────────────────────────
@@ -151,6 +167,8 @@ def _row_to_dict(row) -> dict:
151
  "min_speech_ms": row.get("min_speech_ms"),
152
  "pad_ms": row.get("pad_ms"),
153
  "segments": row.get("segments"),
 
 
154
  "resegmented": row.get("resegmented"),
155
  "retranscribed": row.get("retranscribed"),
156
  "error": row.get("error"),
@@ -236,7 +254,7 @@ def load_logs_handler():
236
  return rows, indices, status, table_data
237
 
238
 
239
- def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
240
  """Filter and sort cached rows, return new table + index mapping."""
241
  if not all_rows:
242
  return [], gr.update()
@@ -244,7 +262,25 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
244
  surah_names = _load_surah_names()
245
  indices = []
246
 
 
 
 
 
 
 
247
  for i, row in enumerate(all_rows):
 
 
 
 
 
 
 
 
 
 
 
 
248
  # Device filter
249
  if device != "All":
250
  row_device = (row.get("device") or "").lower()
@@ -282,14 +318,144 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
282
  return indices, table_data
283
 
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
286
- """When a table row is clicked, download audio and render segments."""
 
 
 
 
 
287
  if not all_rows or not filtered_indices:
288
- return ""
289
 
290
  display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
291
  if display_idx < 0 or display_idx >= len(filtered_indices):
292
- return ""
293
 
294
  row_idx = filtered_indices[display_idx]
295
  row = all_rows[row_idx]
@@ -300,10 +466,51 @@ def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
300
  # Build summary HTML
301
  summary_html = _build_summary_html(row, surah_names)
302
 
303
- # Try to reconstruct and render segments
304
- segments_html = _build_segments_from_log(row, audio_id)
305
-
306
- return summary_html + segments_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
 
309
  # ── Summary HTML builder ───────────────────────────────────────────────
@@ -385,25 +592,30 @@ def _build_summary_html(row, surah_names) -> str:
385
 
386
  # ── Segment reconstruction from log ───────────────────────────────────
387
 
388
- def _build_segments_from_log(row, audio_id) -> str:
389
- """Build segment cards from the log's segments JSON, downloading audio on demand."""
 
 
 
 
390
  segments_str = row.get("segments")
 
391
  if not segments_str:
392
- return '<div style="color: #999; padding: 20px;">No segment data in this log row.</div>'
393
 
394
  try:
395
  runs = json.loads(segments_str)
396
  except (json.JSONDecodeError, TypeError):
397
- return '<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>'
398
 
399
  if not runs or not isinstance(runs, list):
400
- return '<div style="color: #999; padding: 20px;">Empty segment runs.</div>'
401
 
402
  # Use the last run (most recent alignment pass)
403
  last_run = runs[-1]
404
  seg_list = last_run.get("segments", [])
405
  if not seg_list:
406
- return '<div style="color: #999; padding: 20px;">No segments in last run.</div>'
407
 
408
  # Try to download audio for this specific row
409
  audio_int16 = None
@@ -415,13 +627,14 @@ def _build_segments_from_log(row, audio_id) -> str:
415
  except Exception as e:
416
  print(f"[dev_tools] Audio download failed: {e}")
417
 
418
- # Build SegmentInfo objects and render
419
  from src.core.segment_types import SegmentInfo
420
  from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
421
  from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
422
 
423
  segments = []
424
- for seg_data in seg_list:
 
425
  ref = seg_data.get("ref", "")
426
  confidence = seg_data.get("confidence", 0.0) or 0.0
427
  start = seg_data.get("start", 0.0) or 0.0
@@ -430,10 +643,19 @@ def _build_segments_from_log(row, audio_id) -> str:
430
  special_type = seg_data.get("special_type", "")
431
  duration = end - start
432
 
 
 
 
 
 
 
 
 
 
 
433
  # Reconstruct matched_text
434
  matched_text = ""
435
  if ref in ALL_SPECIAL_REFS:
436
- # For known specials, use the constant text
437
  if ref in SPECIAL_TEXT:
438
  matched_text = SPECIAL_TEXT[ref]
439
  elif ref:
@@ -460,11 +682,25 @@ def _build_segments_from_log(row, audio_id) -> str:
460
  )
461
  segments.append(seg_info)
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  if not segments:
464
- return '<div style="color: #999; padding: 20px;">No valid segments to display.</div>'
465
 
466
- return render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
467
- segment_dir=segment_dir)
 
468
 
469
 
470
  def _download_audio_for_row(audio_id: str):
@@ -477,7 +713,6 @@ def _download_audio_for_row(audio_id: str):
477
  raise ValueError("No HF token")
478
 
479
  from datasets import load_dataset
480
- import librosa
481
 
482
  ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
483
  split="train", streaming=True)
@@ -492,15 +727,15 @@ def _download_audio_for_row(audio_id: str):
492
  audio_array = audio_data["array"]
493
  sr = audio_data["sampling_rate"]
494
 
495
- # Resample to 16kHz if needed
496
- if sr != 16000:
497
- audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=16000)
498
- sr = 16000
499
-
500
  # Convert to int16
501
  audio_float = np.clip(audio_array, -1.0, 1.0)
502
  audio_int16 = (audio_float * 32767).astype(np.int16)
503
 
 
 
 
 
 
504
  # Create segment directory
505
  segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
506
  segment_dir.mkdir(parents=True, exist_ok=True)
@@ -508,3 +743,90 @@ def _download_audio_for_row(audio_id: str):
508
  return audio_int16, sr, segment_dir
509
 
510
  raise ValueError(f"Audio ID '{audio_id}' not found in dataset")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import json
4
  import os
5
+ import shutil
6
  import uuid
7
  from datetime import datetime, timezone
8
  from pathlib import Path
 
104
  c.dev_sort = gr.Dropdown(
105
  choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
106
  )
107
+ c.dev_days_filter = gr.Number(
108
+ label="Last N Days", value=None, precision=0, minimum=1, scale=1,
109
+ )
110
 
111
  c.dev_table = gr.Dataframe(
112
  headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
 
118
  wrap=True,
119
  )
120
 
121
+ with gr.Row():
122
+ c.dev_gpu_plot = gr.Plot(label="GPU: Audio Duration vs Processing Time", visible=False)
123
+ c.dev_cpu_plot = gr.Plot(label="CPU: Audio Duration vs Processing Time", visible=False)
124
+
125
  c.dev_detail_html = gr.HTML(value="", label="Log Detail")
126
 
127
+ with gr.Row():
128
+ c.dev_compute_ts_btn = gr.Button("Compute Timestamps", variant="secondary",
129
+ interactive=False, visible=False)
130
+ c.dev_compute_ts_progress = gr.HTML(value="", visible=False)
131
+ c.dev_animate_all_html = gr.HTML(value="", visible=False)
132
+
133
  # State
134
  c.dev_all_rows = gr.State(value=[])
135
  c.dev_filtered_indices = gr.State(value=[])
136
+ c.dev_segment_dir = gr.State(value=None)
137
+ c.dev_json_output = gr.State(value=None)
138
 
139
 
140
  # ── Row extraction ─────────────────────────────────────────────────────
 
167
  "min_speech_ms": row.get("min_speech_ms"),
168
  "pad_ms": row.get("pad_ms"),
169
  "segments": row.get("segments"),
170
+ "word_timestamps": row.get("word_timestamps"),
171
+ "char_timestamps": row.get("char_timestamps"),
172
  "resegmented": row.get("resegmented"),
173
  "retranscribed": row.get("retranscribed"),
174
  "error": row.get("error"),
 
254
  return rows, indices, status, table_data
255
 
256
 
257
+ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by, days=None):
258
  """Filter and sort cached rows, return new table + index mapping."""
259
  if not all_rows:
260
  return [], gr.update()
 
262
  surah_names = _load_surah_names()
263
  indices = []
264
 
265
+ # Compute cutoff for days filter
266
+ cutoff = None
267
+ if days is not None and days > 0:
268
+ from datetime import timedelta
269
+ cutoff = datetime.now(timezone.utc) - timedelta(days=int(days))
270
+
271
  for i, row in enumerate(all_rows):
272
+ # Days filter
273
+ if cutoff is not None:
274
+ ts = row.get("timestamp", "")
275
+ try:
276
+ row_dt = datetime.fromisoformat(ts)
277
+ if row_dt.tzinfo is None:
278
+ row_dt = row_dt.replace(tzinfo=timezone.utc)
279
+ if row_dt < cutoff:
280
+ continue
281
+ except (ValueError, TypeError):
282
+ continue
283
+
284
  # Device filter
285
  if device != "All":
286
  row_device = (row.get("device") or "").lower()
 
318
  return indices, table_data
319
 
320
 
321
+ def build_profiling_plots_handler(all_rows, filtered_indices):
322
+ """Build GPU and CPU linear regression scatter plots from filtered data."""
323
+ if not all_rows or not filtered_indices:
324
+ return gr.update(visible=False), gr.update(visible=False)
325
+
326
+ import matplotlib
327
+ matplotlib.use("Agg")
328
+ import matplotlib.pyplot as plt
329
+
330
+ # Collect data points from filtered rows
331
+ gpu_rows = [] # (audio_dur, vad_gpu, asr_gpu, asr_model)
332
+ cpu_rows = []
333
+
334
+ for i in filtered_indices:
335
+ row = all_rows[i]
336
+ audio_dur = row.get("audio_duration_s")
337
+ vad_gpu = row.get("vad_gpu_time")
338
+ asr_gpu = row.get("asr_gpu_time")
339
+ device = (row.get("device") or "").lower()
340
+ asr_model = row.get("asr_model", "")
341
+
342
+ if audio_dur is None or audio_dur <= 0:
343
+ continue
344
+
345
+ entry = (audio_dur, vad_gpu, asr_gpu, asr_model)
346
+ if device in ("cuda", "gpu"):
347
+ gpu_rows.append(entry)
348
+ elif device == "cpu":
349
+ cpu_rows.append(entry)
350
+
351
+ def _build_figure(rows, title):
352
+ """Build a dual y-axis scatter + regression figure for one device type."""
353
+ if not rows:
354
+ return None
355
+
356
+ # Split series
357
+ vad_x, vad_y = [], []
358
+ asr_base_x, asr_base_y = [], []
359
+ asr_large_x, asr_large_y = [], []
360
+
361
+ for audio_dur, vad_t, asr_t, model in rows:
362
+ if vad_t is not None and vad_t > 0:
363
+ vad_x.append(audio_dur)
364
+ vad_y.append(vad_t)
365
+ if asr_t is not None and asr_t > 0:
366
+ if model == "Base":
367
+ asr_base_x.append(audio_dur)
368
+ asr_base_y.append(asr_t)
369
+ elif model == "Large":
370
+ asr_large_x.append(audio_dur)
371
+ asr_large_y.append(asr_t)
372
+
373
+ if not vad_x and not asr_base_x and not asr_large_x:
374
+ return None
375
+
376
+ fig, ax_vad = plt.subplots(figsize=(7, 4.5))
377
+ ax_asr = ax_vad.twinx()
378
+
379
+ handles, labels = [], []
380
+
381
+ # VAD series (left y-axis, blue)
382
+ if vad_x:
383
+ s = ax_vad.scatter(vad_x, vad_y, color="#4a9eff", alpha=0.5, s=20, zorder=3)
384
+ handles.append(s)
385
+ if len(vad_x) >= 2:
386
+ coeffs = np.polyfit(vad_x, vad_y, 1)
387
+ x_line = np.array([min(vad_x), max(vad_x)])
388
+ y_line = np.polyval(coeffs, x_line)
389
+ line, = ax_vad.plot(x_line, y_line, color="#4a9eff", linewidth=1.5, zorder=4)
390
+ labels.append(f"VAD: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
391
+ else:
392
+ labels.append("VAD")
393
+
394
+ # ASR Base series (right y-axis, orange)
395
+ if asr_base_x:
396
+ s = ax_asr.scatter(asr_base_x, asr_base_y, color="#f0ad4e", alpha=0.5, s=20, marker="^", zorder=3)
397
+ handles.append(s)
398
+ if len(asr_base_x) >= 2:
399
+ coeffs = np.polyfit(asr_base_x, asr_base_y, 1)
400
+ x_line = np.array([min(asr_base_x), max(asr_base_x)])
401
+ y_line = np.polyval(coeffs, x_line)
402
+ ax_asr.plot(x_line, y_line, color="#f0ad4e", linewidth=1.5, zorder=4)
403
+ labels.append(f"ASR Base: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
404
+ else:
405
+ labels.append("ASR Base")
406
+
407
+ # ASR Large series (right y-axis, red)
408
+ if asr_large_x:
409
+ s = ax_asr.scatter(asr_large_x, asr_large_y, color="#d9534f", alpha=0.5, s=20, marker="s", zorder=3)
410
+ handles.append(s)
411
+ if len(asr_large_x) >= 2:
412
+ coeffs = np.polyfit(asr_large_x, asr_large_y, 1)
413
+ x_line = np.array([min(asr_large_x), max(asr_large_x)])
414
+ y_line = np.polyval(coeffs, x_line)
415
+ ax_asr.plot(x_line, y_line, color="#d9534f", linewidth=1.5, zorder=4)
416
+ labels.append(f"ASR Large: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
417
+ else:
418
+ labels.append("ASR Large")
419
+
420
+ ax_vad.set_xlabel("Audio Duration (s)")
421
+ ax_vad.set_ylabel("VAD Time (s)", color="#4a9eff")
422
+ ax_asr.set_ylabel("ASR Time (s)", color="#f0ad4e")
423
+ ax_vad.tick_params(axis="y", labelcolor="#4a9eff")
424
+ ax_asr.tick_params(axis="y", labelcolor="#f0ad4e")
425
+ ax_vad.set_title(title)
426
+
427
+ if handles:
428
+ fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0.12, 0.88),
429
+ fontsize=8, framealpha=0.8)
430
+
431
+ fig.tight_layout()
432
+ return fig
433
+
434
+ gpu_fig = _build_figure(gpu_rows, "GPU: Audio Duration vs Processing Time")
435
+ cpu_fig = _build_figure(cpu_rows, "CPU: Audio Duration vs Processing Time")
436
+
437
+ gpu_update = gr.update(value=gpu_fig, visible=True) if gpu_fig else gr.update(visible=False)
438
+ cpu_update = gr.update(value=cpu_fig, visible=True) if cpu_fig else gr.update(visible=False)
439
+
440
+ # Close figures to free memory
441
+ plt.close("all")
442
+
443
+ return gpu_update, cpu_update
444
+
445
+
446
  def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
447
+ """When a table row is clicked, download audio, render segments, inject timestamps if available.
448
+
449
+ Returns 6-tuple: (dev_detail_html, dev_json_output, dev_segment_dir,
450
+ dev_compute_ts_btn, dev_animate_all_html, dev_compute_ts_progress)
451
+ """
452
+ _empty = ("", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
453
  if not all_rows or not filtered_indices:
454
+ return _empty
455
 
456
  display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
457
  if display_idx < 0 or display_idx >= len(filtered_indices):
458
+ return _empty
459
 
460
  row_idx = filtered_indices[display_idx]
461
  row = all_rows[row_idx]
 
466
  # Build summary HTML
467
  summary_html = _build_summary_html(row, surah_names)
468
 
469
+ # Reconstruct and render segments
470
+ html, json_segments, segment_dir = _build_segments_from_log(row, audio_id)
471
+ html = summary_html + html
472
+
473
+ # Check if timestamps exist in the log
474
+ has_ts = bool(row.get("word_timestamps"))
475
+
476
+ if has_ts and json_segments:
477
+ try:
478
+ from src.mfa import inject_timestamps_into_html
479
+
480
+ results = _log_timestamps_to_mfa_results(
481
+ row.get("word_timestamps"), row.get("char_timestamps")
482
+ )
483
+ seg_to_result_idx = _build_seg_to_result_idx_from_log(json_segments, results)
484
+ enriched_html, enriched_json = inject_timestamps_into_html(
485
+ html, json_segments, results, seg_to_result_idx,
486
+ str(segment_dir) if segment_dir else None,
487
+ )
488
+ animate_btn = '<button class="animate-all-btn">Animate All</button>'
489
+ return (
490
+ enriched_html,
491
+ enriched_json,
492
+ str(segment_dir) if segment_dir else None,
493
+ gr.update(visible=False, interactive=False),
494
+ gr.update(value=animate_btn, visible=True),
495
+ gr.update(visible=False),
496
+ )
497
+ except Exception as e:
498
+ print(f"[dev_tools] Timestamp injection from log failed: {e}")
499
+ import traceback
500
+ traceback.print_exc()
501
+ # Fall through to non-timestamp path
502
+
503
+ # No timestamps — build basic json_output and show Compute Timestamps button
504
+ json_output = {"segments": json_segments} if json_segments else None
505
+ has_audio = segment_dir is not None
506
+ return (
507
+ html,
508
+ json_output,
509
+ str(segment_dir) if segment_dir else None,
510
+ gr.update(visible=has_audio, interactive=has_audio),
511
+ gr.update(visible=False),
512
+ gr.update(visible=False),
513
+ )
514
 
515
 
516
  # ── Summary HTML builder ───────────────────────────────────────────────
 
592
 
593
  # ── Segment reconstruction from log ───────────────────────────────────
594
 
595
+ def _build_segments_from_log(row, audio_id):
596
+ """Build segment cards from the log's segments JSON, downloading audio on demand.
597
+
598
+ Returns (html, json_segments, segment_dir) where json_segments is a list
599
+ of dicts compatible with the MFA/timestamp pipeline.
600
+ """
601
  segments_str = row.get("segments")
602
+ _empty = ('<div style="color: #999; padding: 20px;">No segment data in this log row.</div>', [], None)
603
  if not segments_str:
604
+ return _empty
605
 
606
  try:
607
  runs = json.loads(segments_str)
608
  except (json.JSONDecodeError, TypeError):
609
+ return ('<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>', [], None)
610
 
611
  if not runs or not isinstance(runs, list):
612
+ return ('<div style="color: #999; padding: 20px;">Empty segment runs.</div>', [], None)
613
 
614
  # Use the last run (most recent alignment pass)
615
  last_run = runs[-1]
616
  seg_list = last_run.get("segments", [])
617
  if not seg_list:
618
+ return ('<div style="color: #999; padding: 20px;">No segments in last run.</div>', [], None)
619
 
620
  # Try to download audio for this specific row
621
  audio_int16 = None
 
627
  except Exception as e:
628
  print(f"[dev_tools] Audio download failed: {e}")
629
 
630
+ # Build SegmentInfo objects and json_segments in parallel
631
  from src.core.segment_types import SegmentInfo
632
  from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
633
  from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
634
 
635
  segments = []
636
+ json_segments = []
637
+ for seg_idx, seg_data in enumerate(seg_list):
638
  ref = seg_data.get("ref", "")
639
  confidence = seg_data.get("confidence", 0.0) or 0.0
640
  start = seg_data.get("start", 0.0) or 0.0
 
643
  special_type = seg_data.get("special_type", "")
644
  duration = end - start
645
 
646
+ # Parse ref into ref_from/ref_to/special_type
647
+ if ref in ALL_SPECIAL_REFS:
648
+ ref_from, ref_to, parsed_special = "", "", ref
649
+ elif "-" in ref:
650
+ ref_from, ref_to = ref.split("-", 1)
651
+ parsed_special = ""
652
+ else:
653
+ ref_from = ref_to = ref
654
+ parsed_special = ""
655
+
656
  # Reconstruct matched_text
657
  matched_text = ""
658
  if ref in ALL_SPECIAL_REFS:
 
659
  if ref in SPECIAL_TEXT:
660
  matched_text = SPECIAL_TEXT[ref]
661
  elif ref:
 
682
  )
683
  segments.append(seg_info)
684
 
685
+ json_segments.append({
686
+ "segment": seg_idx + 1,
687
+ "ref_from": ref_from,
688
+ "ref_to": ref_to,
689
+ "time_from": start,
690
+ "time_to": end,
691
+ "confidence": confidence,
692
+ "special_type": parsed_special,
693
+ "matched_text": matched_text,
694
+ "error": error,
695
+ "has_missing_words": has_missing,
696
+ })
697
+
698
  if not segments:
699
+ return ('<div style="color: #999; padding: 20px;">No valid segments to display.</div>', [], None)
700
 
701
+ html = render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
702
+ segment_dir=segment_dir, skip_full_audio=True)
703
+ return html, json_segments, segment_dir
704
 
705
 
706
  def _download_audio_for_row(audio_id: str):
 
713
  raise ValueError("No HF token")
714
 
715
  from datasets import load_dataset
 
716
 
717
  ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
718
  split="train", streaming=True)
 
727
  audio_array = audio_data["array"]
728
  sr = audio_data["sampling_rate"]
729
 
 
 
 
 
 
730
  # Convert to int16
731
  audio_float = np.clip(audio_array, -1.0, 1.0)
732
  audio_int16 = (audio_float * 32767).astype(np.int16)
733
 
734
+ # Clean up old dev segment directories
735
+ for old_dir in SEGMENT_AUDIO_DIR.glob("dev_*"):
736
+ if old_dir.is_dir():
737
+ shutil.rmtree(old_dir, ignore_errors=True)
738
+
739
  # Create segment directory
740
  segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
741
  segment_dir.mkdir(parents=True, exist_ok=True)
 
743
  return audio_int16, sr, segment_dir
744
 
745
  raise ValueError(f"Audio ID '{audio_id}' not found in dataset")
746
+
747
+
748
+ # ── Log timestamps → MFA results conversion ──────────────────────────
749
+
750
+ def _log_timestamps_to_mfa_results(word_ts_json, char_ts_json):
751
+ """Convert logged timestamp format to MFA results format.
752
+
753
+ Log char_timestamps: [{ref, words: [{word, location, letters: [{char, start, end}]}]}]
754
+ MFA results format: [{status: "ok", ref, words: [{word, location, start, end, letters: [...]}]}]
755
+ """
756
+ char_ts = json.loads(char_ts_json) if char_ts_json else []
757
+ word_ts = json.loads(word_ts_json) if word_ts_json else []
758
+
759
+ # Build word-level start/end lookup from word_timestamps
760
+ word_lookup = {} # {ref: {word_idx: (start, end)}}
761
+ for entry in word_ts:
762
+ ref = entry.get("ref", "")
763
+ for widx, w in enumerate(entry.get("words", [])):
764
+ if w.get("start") is not None and w.get("end") is not None:
765
+ word_lookup.setdefault(ref, {})[widx] = (w["start"], w["end"])
766
+
767
+ results = []
768
+
769
+ if char_ts:
770
+ # Primary path: use char_timestamps (has location + letters)
771
+ for entry in char_ts:
772
+ ref = entry.get("ref", "")
773
+ ref_word_lookup = word_lookup.get(ref, {})
774
+ words = []
775
+ for widx, w in enumerate(entry.get("words", [])):
776
+ word_start, word_end = ref_word_lookup.get(widx, (None, None))
777
+ letters = w.get("letters", [])
778
+ # Infer word start/end from letters if not in word_timestamps
779
+ if word_start is None and letters:
780
+ starts = [lt["start"] for lt in letters if lt.get("start") is not None]
781
+ ends = [lt["end"] for lt in letters if lt.get("end") is not None]
782
+ if starts and ends:
783
+ word_start = min(starts)
784
+ word_end = max(ends)
785
+ words.append({
786
+ "word": w.get("word", ""),
787
+ "location": w.get("location", ""),
788
+ "start": word_start,
789
+ "end": word_end,
790
+ "letters": letters,
791
+ })
792
+ results.append({"status": "ok", "ref": ref, "words": words})
793
+ elif word_ts:
794
+ # Fallback: word_timestamps only (no letters)
795
+ for entry in word_ts:
796
+ ref = entry.get("ref", "")
797
+ words = []
798
+ for w in entry.get("words", []):
799
+ words.append({
800
+ "word": w.get("word", ""),
801
+ "location": "",
802
+ "start": w.get("start"),
803
+ "end": w.get("end"),
804
+ "letters": [],
805
+ })
806
+ results.append({"status": "ok", "ref": ref, "words": words})
807
+
808
+ return results
809
+
810
+
811
+ def _build_seg_to_result_idx_from_log(json_segments, results):
812
+ """Map segment indices to MFA result indices by matching refs."""
813
+ from src.mfa import _build_mfa_ref
814
+
815
+ # Build ref → result index lookup
816
+ ref_to_result = {}
817
+ for i, r in enumerate(results):
818
+ ref = r.get("ref", "")
819
+ if ref:
820
+ ref_to_result[ref] = i
821
+
822
+ seg_to_result_idx = {}
823
+ for seg in json_segments:
824
+ mfa_ref = _build_mfa_ref(seg)
825
+ if mfa_ref is None:
826
+ continue
827
+ seg_idx = seg.get("segment", 0) - 1
828
+ result_idx = ref_to_result.get(mfa_ref)
829
+ if result_idx is not None:
830
+ seg_to_result_idx[seg_idx] = result_idx
831
+
832
+ return seg_to_result_idx
src/ui/event_wiring.py CHANGED
@@ -505,41 +505,70 @@ def _wire_dev_tab(c):
505
  """Wire dev tab event handlers."""
506
  from src.ui.dev_tools import (
507
  load_logs_handler, filter_and_sort_handler, select_log_row_handler,
 
508
  )
509
 
510
  # Load / Refresh buttons
511
  _load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
512
 
 
 
513
  c.dev_load_btn.click(
514
  fn=load_logs_handler,
515
  inputs=[],
516
  outputs=_load_outputs,
517
  api_name=False, show_progress="minimal",
 
 
 
 
 
518
  )
519
  c.dev_refresh_btn.click(
520
  fn=load_logs_handler,
521
  inputs=[],
522
  outputs=_load_outputs,
523
  api_name=False, show_progress="minimal",
 
 
 
 
 
524
  )
525
 
526
  # Filter / Sort changes
527
  _filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
528
- c.dev_filter_status, c.dev_sort]
529
  _filter_outputs = [c.dev_filtered_indices, c.dev_table]
530
 
531
- for component in [c.dev_filter_device, c.dev_filter_model, c.dev_filter_status, c.dev_sort]:
 
532
  component.change(
533
  fn=filter_and_sort_handler,
534
  inputs=_filter_inputs,
535
  outputs=_filter_outputs,
536
  api_name=False, show_progress="hidden",
 
 
 
 
 
537
  )
538
 
539
- # Table row selection
540
  c.dev_table.select(
541
  fn=select_log_row_handler,
542
  inputs=[c.dev_all_rows, c.dev_filtered_indices],
543
- outputs=[c.dev_detail_html],
 
544
  api_name=False, show_progress="minimal",
545
  )
 
 
 
 
 
 
 
 
 
 
505
  """Wire dev tab event handlers."""
506
  from src.ui.dev_tools import (
507
  load_logs_handler, filter_and_sort_handler, select_log_row_handler,
508
+ build_profiling_plots_handler,
509
  )
510
 
511
  # Load / Refresh buttons
512
  _load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
513
 
514
+ _plot_outputs = [c.dev_gpu_plot, c.dev_cpu_plot]
515
+
516
  c.dev_load_btn.click(
517
  fn=load_logs_handler,
518
  inputs=[],
519
  outputs=_load_outputs,
520
  api_name=False, show_progress="minimal",
521
+ ).then(
522
+ fn=build_profiling_plots_handler,
523
+ inputs=[c.dev_all_rows, c.dev_filtered_indices],
524
+ outputs=_plot_outputs,
525
+ show_progress="hidden",
526
  )
527
  c.dev_refresh_btn.click(
528
  fn=load_logs_handler,
529
  inputs=[],
530
  outputs=_load_outputs,
531
  api_name=False, show_progress="minimal",
532
+ ).then(
533
+ fn=build_profiling_plots_handler,
534
+ inputs=[c.dev_all_rows, c.dev_filtered_indices],
535
+ outputs=_plot_outputs,
536
+ show_progress="hidden",
537
  )
538
 
539
  # Filter / Sort changes
540
  _filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
541
+ c.dev_filter_status, c.dev_sort, c.dev_days_filter]
542
  _filter_outputs = [c.dev_filtered_indices, c.dev_table]
543
 
544
+ for component in [c.dev_filter_device, c.dev_filter_model,
545
+ c.dev_filter_status, c.dev_sort, c.dev_days_filter]:
546
  component.change(
547
  fn=filter_and_sort_handler,
548
  inputs=_filter_inputs,
549
  outputs=_filter_outputs,
550
  api_name=False, show_progress="hidden",
551
+ ).then(
552
+ fn=build_profiling_plots_handler,
553
+ inputs=[c.dev_all_rows, c.dev_filtered_indices],
554
+ outputs=_plot_outputs,
555
+ show_progress="hidden",
556
  )
557
 
558
+ # Table row selection — returns 6-tuple with timestamps + controls
559
  c.dev_table.select(
560
  fn=select_log_row_handler,
561
  inputs=[c.dev_all_rows, c.dev_filtered_indices],
562
+ outputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir,
563
+ c.dev_compute_ts_btn, c.dev_animate_all_html, c.dev_compute_ts_progress],
564
  api_name=False, show_progress="minimal",
565
  )
566
+
567
+ # Compute Timestamps button — uses same MFA flow as main tab
568
+ c.dev_compute_ts_btn.click(
569
+ fn=compute_mfa_timestamps,
570
+ inputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir],
571
+ outputs=[c.dev_detail_html, c.dev_compute_ts_btn, c.dev_animate_all_html,
572
+ c.dev_compute_ts_progress, c.dev_json_output],
573
+ api_name=False, show_progress="hidden",
574
+ )
src/ui/interface.py CHANGED
@@ -42,7 +42,7 @@ def build_interface():
42
  gr.Markdown("""
43
  - Transcribe and split any recitation by pauses within 1-2 minutes
44
  - Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
45
- - GPU-powered API usage with daily quotas, and unlimited CPU usage
46
  - Reliable confidence system to flag uncertain segments and missed words — no silent errors
47
  - Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
48
  - Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
@@ -54,9 +54,18 @@ def build_interface():
54
  with gr.Accordion("\U0001f4e1 API Usage", open=False):
55
  gr.Markdown(_api_doc)
56
 
57
- with gr.Row(elem_id="main-row"):
58
- _build_left_column(c)
59
- _build_right_column(c)
 
 
 
 
 
 
 
 
 
60
 
61
  # State components for caching VAD data between runs
62
  c.cached_speech_intervals = gr.State(value=None)
@@ -206,16 +215,9 @@ def _build_animation_settings(c):
206
 
207
 
208
  def _build_right_column(c):
209
- """Build the right output column, with optional Dev tab."""
210
  with gr.Column(scale=RIGHT_COLUMN_SCALE):
211
- if DEV_TAB_VISIBLE:
212
- with gr.Tabs():
213
- with gr.Tab("Results"):
214
- _build_results_content(c)
215
- with gr.Tab("Dev"):
216
- _build_dev_tab(c)
217
- else:
218
- _build_results_content(c)
219
 
220
 
221
  def _build_results_content(c):
 
42
  gr.Markdown("""
43
  - Transcribe and split any recitation by pauses within 1-2 minutes
44
  - Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
45
+ - GPU-powered [API usage](https://huggingface.co/spaces/hetchyy/Quran-multi-aligner/blob/main/docs/client_api.md) with daily quotas, and unlimited CPU usage
46
  - Reliable confidence system to flag uncertain segments and missed words — no silent errors
47
  - Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
48
  - Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
 
54
  with gr.Accordion("\U0001f4e1 API Usage", open=False):
55
  gr.Markdown(_api_doc)
56
 
57
+ if DEV_TAB_VISIBLE:
58
+ with gr.Tabs():
59
+ with gr.Tab("Results"):
60
+ with gr.Row(elem_id="main-row"):
61
+ _build_left_column(c)
62
+ _build_right_column(c)
63
+ with gr.Tab("Dev"):
64
+ _build_dev_tab(c)
65
+ else:
66
+ with gr.Row(elem_id="main-row"):
67
+ _build_left_column(c)
68
+ _build_right_column(c)
69
 
70
  # State components for caching VAD data between runs
71
  c.cached_speech_intervals = gr.State(value=None)
 
215
 
216
 
217
  def _build_right_column(c):
218
+ """Build the right output column."""
219
  with gr.Column(scale=RIGHT_COLUMN_SCALE):
220
+ _build_results_content(c)
 
 
 
 
 
 
 
221
 
222
 
223
  def _build_results_content(c):
src/ui/segments.py CHANGED
@@ -373,7 +373,7 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
373
  return html
374
 
375
 
376
- def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None) -> str:
377
  """Render all segments as HTML with optional audio players.
378
 
379
  Args:
@@ -390,7 +390,7 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
390
 
391
  # Write full audio file for unified megacard playback
392
  full_audio_url = ""
393
- if audio_int16 is not None and sample_rate > 0 and segment_dir:
394
  full_path = segment_dir / "full.wav"
395
  with wave.open(str(full_path), 'wb') as wf:
396
  wf.setnchannels(1)
 
373
  return html
374
 
375
 
376
+ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None, skip_full_audio: bool = False) -> str:
377
  """Render all segments as HTML with optional audio players.
378
 
379
  Args:
 
390
 
391
  # Write full audio file for unified megacard playback
392
  full_audio_url = ""
393
+ if audio_int16 is not None and sample_rate > 0 and segment_dir and not skip_full_audio:
394
  full_path = segment_dir / "full.wav"
395
  with wave.open(str(full_path), 'wb') as wf:
396
  wf.setnchannels(1)