Quran-multi-aligner

Running on Zero

App Files Files Community

hetchyy commited on Feb 23

Commit

5df5324

1 Parent(s): c431bc9

Update dev tools

Browse files

Files changed (5) hide show

src/mfa.py +74 -58
src/ui/dev_tools.py +348 -26
src/ui/event_wiring.py +33 -4
src/ui/interface.py +15 -13
src/ui/segments.py +2 -2

src/mfa.py CHANGED Viewed

@@ -683,16 +683,81 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
         )
         raise
-    # Build timestamp lookups using shared helper
-    word_timestamps, letter_timestamps, word_to_all_results = _build_timestamp_lookups(results)
-    # Build cross-word groups using shared helper
-    crossword_groups = _build_crossword_groups(results, letter_timestamps)
-    # Extend word timestamps using shared helper
-    _extend_word_timestamps(word_timestamps, segments, seg_to_result_idx, results, segment_dir)
-    # --- HTML injection (UI-only, not shared with API) ---
     # Inject timestamps into word spans, using segment boundaries to determine result_idx
     seg_boundaries = []
@@ -754,8 +819,6 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
     html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
     # Stamp char spans with MFA letter timestamps
-    import unicodedata
     def _stamp_chars_with_mfa(word_m):
         word_open = word_m.group(1)
         word_abs_start = float(word_m.group(2))
@@ -873,57 +936,10 @@ def compute_mfa_timestamps(current_html, json_output, segment_dir, cached_log_ro
     print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
-    # Log word and char timestamps to usage logger
-    if cached_log_row is not None:
-        try:
-            import json as _json
-            from src.core.usage_logger import update_word_timestamps
-            _ts_log = []
-            _char_ts_log = []
-            for result in results:
-                if result.get("status") != "ok":
-                    continue
-                _ts_log.append({
-                    "ref": result.get("ref", ""),
-                    "words": [
-                        {"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
-                        for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
-                    ],
-                })
-                _char_ts_log.append({
-                    "ref": result.get("ref", ""),
-                    "words": [
-                        {
-                            "word": w.get("word", ""),
-                            "location": w.get("location", ""),
-                            "letters": [
-                                {"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
-                                for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
-                            ],
-                        }
-                        for w in result.get("words", []) if w.get("letters")
-                    ],
-                })
-            update_word_timestamps(
-                cached_log_row,
-                _json.dumps(_ts_log),
-                _json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
-            )
-        except Exception as e:
-            print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
-    # Build enriched JSON using shared helper (UI always includes letters)
     enriched_json = _build_enriched_json(
         segments, results, seg_to_result_idx,
         word_timestamps, letter_timestamps, "words+chars",
     )
-    # Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
-    animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
-    yield (
-        html,
-        gr.update(visible=False),
-        gr.update(value=animate_all_btn_html, visible=True),
-        gr.update(visible=False),
-        enriched_json,
-    )

         )
         raise
+    html, enriched_json = inject_timestamps_into_html(
+        current_html, segments, results, seg_to_result_idx, segment_dir
+    )
+    # Log word and char timestamps to usage logger
+    if cached_log_row is not None:
+        try:
+            import json as _json
+            from src.core.usage_logger import update_word_timestamps
+            _ts_log = []
+            _char_ts_log = []
+            for result in results:
+                if result.get("status") != "ok":
+                    continue
+                _ts_log.append({
+                    "ref": result.get("ref", ""),
+                    "words": [
+                        {"word": w.get("word", ""), "start": round(w["start"], 4), "end": round(w["end"], 4)}
+                        for w in result.get("words", []) if w.get("start") is not None and w.get("end") is not None
+                    ],
+                })
+                _char_ts_log.append({
+                    "ref": result.get("ref", ""),
+                    "words": [
+                        {
+                            "word": w.get("word", ""),
+                            "location": w.get("location", ""),
+                            "letters": [
+                                {"char": lt.get("char", ""), "start": round(lt["start"], 4), "end": round(lt["end"], 4)}
+                                for lt in w.get("letters", []) if lt.get("start") is not None and lt.get("end") is not None
+                            ],
+                        }
+                        for w in result.get("words", []) if w.get("letters")
+                    ],
+                })
+            update_word_timestamps(
+                cached_log_row,
+                _json.dumps(_ts_log),
+                _json.dumps(_char_ts_log) if any(entry["words"] for entry in _char_ts_log) else None,
+            )
+        except Exception as e:
+            print(f"[USAGE_LOG] Failed to log word timestamps: {e}")
+    # Final yield: updated HTML, hide progress bar, show Animate All, enriched JSON
+    animate_all_btn_html = '<button class="animate-all-btn">Animate All</button>'
+    yield (
+        html,
+        gr.update(visible=False),
+        gr.update(value=animate_all_btn_html, visible=True),
+        gr.update(visible=False),
+        enriched_json,
+    )
+# ---------------------------------------------------------------------------
+# Reusable HTML timestamp injection (shared by UI generator and Dev tab)
+# ---------------------------------------------------------------------------
+def inject_timestamps_into_html(current_html, segments, results, seg_to_result_idx, segment_dir):
+    """Inject word and char timestamps into rendered segment HTML.
+    Builds lookups, cross-word groups, extends timestamps, then performs
+    regex-based injection of data-start/data-end attributes into word and
+    char spans. Reusable by both the main MFA flow and the Dev tab
+    log-based flow.
+    Returns (enriched_html, enriched_json).
+    """
+    import re
+    import unicodedata
+    # Build timestamp lookups
+    word_timestamps, letter_timestamps, word_to_all_results = _build_timestamp_lookups(results)
+    crossword_groups = _build_crossword_groups(results, letter_timestamps)
+    _extend_word_timestamps(word_timestamps, segments, seg_to_result_idx, results, segment_dir)
     # Inject timestamps into word spans, using segment boundaries to determine result_idx
     seg_boundaries = []
     html = re.sub(r'(<button class="animate-btn"[^>]*?)\s+disabled(?:="[^"]*")?', r'\1', html)
     # Stamp char spans with MFA letter timestamps
     def _stamp_chars_with_mfa(word_m):
         word_open = word_m.group(1)
         word_abs_start = float(word_m.group(2))
     print(f"[MFA_TS] Done — injected timestamps for {len(word_timestamps)} words")
+    # Build enriched JSON (UI always includes letters)
     enriched_json = _build_enriched_json(
         segments, results, seg_to_result_idx,
         word_timestamps, letter_timestamps, "words+chars",
     )
+    return html, enriched_json

src/ui/dev_tools.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import json
 import os
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -103,6 +104,9 @@ def build_dev_tab_ui(c):
         c.dev_sort = gr.Dropdown(
             choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
         )
     c.dev_table = gr.Dataframe(
         headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
@@ -114,11 +118,23 @@ def build_dev_tab_ui(c):
         wrap=True,
     )
     c.dev_detail_html = gr.HTML(value="", label="Log Detail")
     # State
     c.dev_all_rows = gr.State(value=[])
     c.dev_filtered_indices = gr.State(value=[])
 # ── Row extraction ─────────────────────────────────────────────────────
@@ -151,6 +167,8 @@ def _row_to_dict(row) -> dict:
         "min_speech_ms": row.get("min_speech_ms"),
         "pad_ms": row.get("pad_ms"),
         "segments": row.get("segments"),
         "resegmented": row.get("resegmented"),
         "retranscribed": row.get("retranscribed"),
         "error": row.get("error"),
@@ -236,7 +254,7 @@ def load_logs_handler():
     return rows, indices, status, table_data
-def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
     """Filter and sort cached rows, return new table + index mapping."""
     if not all_rows:
         return [], gr.update()
@@ -244,7 +262,25 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
     surah_names = _load_surah_names()
     indices = []
     for i, row in enumerate(all_rows):
         # Device filter
         if device != "All":
             row_device = (row.get("device") or "").lower()
@@ -282,14 +318,144 @@ def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
     return indices, table_data
 def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
-    """When a table row is clicked, download audio and render segments."""
     if not all_rows or not filtered_indices:
-        return ""
     display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     if display_idx < 0 or display_idx >= len(filtered_indices):
-        return ""
     row_idx = filtered_indices[display_idx]
     row = all_rows[row_idx]
@@ -300,10 +466,51 @@ def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
     # Build summary HTML
     summary_html = _build_summary_html(row, surah_names)
-    # Try to reconstruct and render segments
-    segments_html = _build_segments_from_log(row, audio_id)
-    return summary_html + segments_html
 # ── Summary HTML builder ───────────────────────────────────────────────
@@ -385,25 +592,30 @@ def _build_summary_html(row, surah_names) -> str:
 # ── Segment reconstruction from log ───────────────────────────────────
-def _build_segments_from_log(row, audio_id) -> str:
-    """Build segment cards from the log's segments JSON, downloading audio on demand."""
     segments_str = row.get("segments")
     if not segments_str:
-        return '<div style="color: #999; padding: 20px;">No segment data in this log row.</div>'
     try:
         runs = json.loads(segments_str)
     except (json.JSONDecodeError, TypeError):
-        return '<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>'
     if not runs or not isinstance(runs, list):
-        return '<div style="color: #999; padding: 20px;">Empty segment runs.</div>'
     # Use the last run (most recent alignment pass)
     last_run = runs[-1]
     seg_list = last_run.get("segments", [])
     if not seg_list:
-        return '<div style="color: #999; padding: 20px;">No segments in last run.</div>'
     # Try to download audio for this specific row
     audio_int16 = None
@@ -415,13 +627,14 @@ def _build_segments_from_log(row, audio_id) -> str:
     except Exception as e:
         print(f"[dev_tools] Audio download failed: {e}")
-    # Build SegmentInfo objects and render
     from src.core.segment_types import SegmentInfo
     from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
     from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
     segments = []
-    for seg_data in seg_list:
         ref = seg_data.get("ref", "")
         confidence = seg_data.get("confidence", 0.0) or 0.0
         start = seg_data.get("start", 0.0) or 0.0
@@ -430,10 +643,19 @@ def _build_segments_from_log(row, audio_id) -> str:
         special_type = seg_data.get("special_type", "")
         duration = end - start
         # Reconstruct matched_text
         matched_text = ""
         if ref in ALL_SPECIAL_REFS:
-            # For known specials, use the constant text
             if ref in SPECIAL_TEXT:
                 matched_text = SPECIAL_TEXT[ref]
         elif ref:
@@ -460,11 +682,25 @@ def _build_segments_from_log(row, audio_id) -> str:
         )
         segments.append(seg_info)
     if not segments:
-        return '<div style="color: #999; padding: 20px;">No valid segments to display.</div>'
-    return render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
-                           segment_dir=segment_dir)
 def _download_audio_for_row(audio_id: str):
@@ -477,7 +713,6 @@ def _download_audio_for_row(audio_id: str):
         raise ValueError("No HF token")
     from datasets import load_dataset
-    import librosa
     ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
                       split="train", streaming=True)
@@ -492,15 +727,15 @@ def _download_audio_for_row(audio_id: str):
             audio_array = audio_data["array"]
             sr = audio_data["sampling_rate"]
-            # Resample to 16kHz if needed
-            if sr != 16000:
-                audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=16000)
-                sr = 16000
             # Convert to int16
             audio_float = np.clip(audio_array, -1.0, 1.0)
             audio_int16 = (audio_float * 32767).astype(np.int16)
             # Create segment directory
             segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
             segment_dir.mkdir(parents=True, exist_ok=True)
@@ -508,3 +743,90 @@ def _download_audio_for_row(audio_id: str):
             return audio_int16, sr, segment_dir
     raise ValueError(f"Audio ID '{audio_id}' not found in dataset")

 import json
 import os
+import shutil
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
         c.dev_sort = gr.Dropdown(
             choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
         )
+        c.dev_days_filter = gr.Number(
+            label="Last N Days", value=None, precision=0, minimum=1, scale=1,
+        )
     c.dev_table = gr.Dataframe(
         headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
         wrap=True,
     )
+    with gr.Row():
+        c.dev_gpu_plot = gr.Plot(label="GPU: Audio Duration vs Processing Time", visible=False)
+        c.dev_cpu_plot = gr.Plot(label="CPU: Audio Duration vs Processing Time", visible=False)
     c.dev_detail_html = gr.HTML(value="", label="Log Detail")
+    with gr.Row():
+        c.dev_compute_ts_btn = gr.Button("Compute Timestamps", variant="secondary",
+                                          interactive=False, visible=False)
+        c.dev_compute_ts_progress = gr.HTML(value="", visible=False)
+        c.dev_animate_all_html = gr.HTML(value="", visible=False)
     # State
     c.dev_all_rows = gr.State(value=[])
     c.dev_filtered_indices = gr.State(value=[])
+    c.dev_segment_dir = gr.State(value=None)
+    c.dev_json_output = gr.State(value=None)
 # ── Row extraction ─────────────────────────────────────────────────────
         "min_speech_ms": row.get("min_speech_ms"),
         "pad_ms": row.get("pad_ms"),
         "segments": row.get("segments"),
+        "word_timestamps": row.get("word_timestamps"),
+        "char_timestamps": row.get("char_timestamps"),
         "resegmented": row.get("resegmented"),
         "retranscribed": row.get("retranscribed"),
         "error": row.get("error"),
     return rows, indices, status, table_data
+def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by, days=None):
     """Filter and sort cached rows, return new table + index mapping."""
     if not all_rows:
         return [], gr.update()
     surah_names = _load_surah_names()
     indices = []
+    # Compute cutoff for days filter
+    cutoff = None
+    if days is not None and days > 0:
+        from datetime import timedelta
+        cutoff = datetime.now(timezone.utc) - timedelta(days=int(days))
     for i, row in enumerate(all_rows):
+        # Days filter
+        if cutoff is not None:
+            ts = row.get("timestamp", "")
+            try:
+                row_dt = datetime.fromisoformat(ts)
+                if row_dt.tzinfo is None:
+                    row_dt = row_dt.replace(tzinfo=timezone.utc)
+                if row_dt < cutoff:
+                    continue
+            except (ValueError, TypeError):
+                continue
         # Device filter
         if device != "All":
             row_device = (row.get("device") or "").lower()
     return indices, table_data
+def build_profiling_plots_handler(all_rows, filtered_indices):
+    """Build GPU and CPU linear regression scatter plots from filtered data."""
+    if not all_rows or not filtered_indices:
+        return gr.update(visible=False), gr.update(visible=False)
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    # Collect data points from filtered rows
+    gpu_rows = []  # (audio_dur, vad_gpu, asr_gpu, asr_model)
+    cpu_rows = []
+    for i in filtered_indices:
+        row = all_rows[i]
+        audio_dur = row.get("audio_duration_s")
+        vad_gpu = row.get("vad_gpu_time")
+        asr_gpu = row.get("asr_gpu_time")
+        device = (row.get("device") or "").lower()
+        asr_model = row.get("asr_model", "")
+        if audio_dur is None or audio_dur <= 0:
+            continue
+        entry = (audio_dur, vad_gpu, asr_gpu, asr_model)
+        if device in ("cuda", "gpu"):
+            gpu_rows.append(entry)
+        elif device == "cpu":
+            cpu_rows.append(entry)
+    def _build_figure(rows, title):
+        """Build a dual y-axis scatter + regression figure for one device type."""
+        if not rows:
+            return None
+        # Split series
+        vad_x, vad_y = [], []
+        asr_base_x, asr_base_y = [], []
+        asr_large_x, asr_large_y = [], []
+        for audio_dur, vad_t, asr_t, model in rows:
+            if vad_t is not None and vad_t > 0:
+                vad_x.append(audio_dur)
+                vad_y.append(vad_t)
+            if asr_t is not None and asr_t > 0:
+                if model == "Base":
+                    asr_base_x.append(audio_dur)
+                    asr_base_y.append(asr_t)
+                elif model == "Large":
+                    asr_large_x.append(audio_dur)
+                    asr_large_y.append(asr_t)
+        if not vad_x and not asr_base_x and not asr_large_x:
+            return None
+        fig, ax_vad = plt.subplots(figsize=(7, 4.5))
+        ax_asr = ax_vad.twinx()
+        handles, labels = [], []
+        # VAD series (left y-axis, blue)
+        if vad_x:
+            s = ax_vad.scatter(vad_x, vad_y, color="#4a9eff", alpha=0.5, s=20, zorder=3)
+            handles.append(s)
+            if len(vad_x) >= 2:
+                coeffs = np.polyfit(vad_x, vad_y, 1)
+                x_line = np.array([min(vad_x), max(vad_x)])
+                y_line = np.polyval(coeffs, x_line)
+                line, = ax_vad.plot(x_line, y_line, color="#4a9eff", linewidth=1.5, zorder=4)
+                labels.append(f"VAD: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
+            else:
+                labels.append("VAD")
+        # ASR Base series (right y-axis, orange)
+        if asr_base_x:
+            s = ax_asr.scatter(asr_base_x, asr_base_y, color="#f0ad4e", alpha=0.5, s=20, marker="^", zorder=3)
+            handles.append(s)
+            if len(asr_base_x) >= 2:
+                coeffs = np.polyfit(asr_base_x, asr_base_y, 1)
+                x_line = np.array([min(asr_base_x), max(asr_base_x)])
+                y_line = np.polyval(coeffs, x_line)
+                ax_asr.plot(x_line, y_line, color="#f0ad4e", linewidth=1.5, zorder=4)
+                labels.append(f"ASR Base: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
+            else:
+                labels.append("ASR Base")
+        # ASR Large series (right y-axis, red)
+        if asr_large_x:
+            s = ax_asr.scatter(asr_large_x, asr_large_y, color="#d9534f", alpha=0.5, s=20, marker="s", zorder=3)
+            handles.append(s)
+            if len(asr_large_x) >= 2:
+                coeffs = np.polyfit(asr_large_x, asr_large_y, 1)
+                x_line = np.array([min(asr_large_x), max(asr_large_x)])
+                y_line = np.polyval(coeffs, x_line)
+                ax_asr.plot(x_line, y_line, color="#d9534f", linewidth=1.5, zorder=4)
+                labels.append(f"ASR Large: y={coeffs[0]:.3f}x+{coeffs[1]:.2f}")
+            else:
+                labels.append("ASR Large")
+        ax_vad.set_xlabel("Audio Duration (s)")
+        ax_vad.set_ylabel("VAD Time (s)", color="#4a9eff")
+        ax_asr.set_ylabel("ASR Time (s)", color="#f0ad4e")
+        ax_vad.tick_params(axis="y", labelcolor="#4a9eff")
+        ax_asr.tick_params(axis="y", labelcolor="#f0ad4e")
+        ax_vad.set_title(title)
+        if handles:
+            fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0.12, 0.88),
+                       fontsize=8, framealpha=0.8)
+        fig.tight_layout()
+        return fig
+    gpu_fig = _build_figure(gpu_rows, "GPU: Audio Duration vs Processing Time")
+    cpu_fig = _build_figure(cpu_rows, "CPU: Audio Duration vs Processing Time")
+    gpu_update = gr.update(value=gpu_fig, visible=True) if gpu_fig else gr.update(visible=False)
+    cpu_update = gr.update(value=cpu_fig, visible=True) if cpu_fig else gr.update(visible=False)
+    # Close figures to free memory
+    plt.close("all")
+    return gpu_update, cpu_update
 def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
+    """When a table row is clicked, download audio, render segments, inject timestamps if available.
+    Returns 6-tuple: (dev_detail_html, dev_json_output, dev_segment_dir,
+                       dev_compute_ts_btn, dev_animate_all_html, dev_compute_ts_progress)
+    """
+    _empty = ("", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
     if not all_rows or not filtered_indices:
+        return _empty
     display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     if display_idx < 0 or display_idx >= len(filtered_indices):
+        return _empty
     row_idx = filtered_indices[display_idx]
     row = all_rows[row_idx]
     # Build summary HTML
     summary_html = _build_summary_html(row, surah_names)
+    # Reconstruct and render segments
+    html, json_segments, segment_dir = _build_segments_from_log(row, audio_id)
+    html = summary_html + html
+    # Check if timestamps exist in the log
+    has_ts = bool(row.get("word_timestamps"))
+    if has_ts and json_segments:
+        try:
+            from src.mfa import inject_timestamps_into_html
+            results = _log_timestamps_to_mfa_results(
+                row.get("word_timestamps"), row.get("char_timestamps")
+            )
+            seg_to_result_idx = _build_seg_to_result_idx_from_log(json_segments, results)
+            enriched_html, enriched_json = inject_timestamps_into_html(
+                html, json_segments, results, seg_to_result_idx,
+                str(segment_dir) if segment_dir else None,
+            )
+            animate_btn = '<button class="animate-all-btn">Animate All</button>'
+            return (
+                enriched_html,
+                enriched_json,
+                str(segment_dir) if segment_dir else None,
+                gr.update(visible=False, interactive=False),
+                gr.update(value=animate_btn, visible=True),
+                gr.update(visible=False),
+            )
+        except Exception as e:
+            print(f"[dev_tools] Timestamp injection from log failed: {e}")
+            import traceback
+            traceback.print_exc()
+            # Fall through to non-timestamp path
+    # No timestamps — build basic json_output and show Compute Timestamps button
+    json_output = {"segments": json_segments} if json_segments else None
+    has_audio = segment_dir is not None
+    return (
+        html,
+        json_output,
+        str(segment_dir) if segment_dir else None,
+        gr.update(visible=has_audio, interactive=has_audio),
+        gr.update(visible=False),
+        gr.update(visible=False),
+    )
 # ── Summary HTML builder ───────────────────────────────────────────────
 # ── Segment reconstruction from log ───────────────────────────────────
+def _build_segments_from_log(row, audio_id):
+    """Build segment cards from the log's segments JSON, downloading audio on demand.
+    Returns (html, json_segments, segment_dir) where json_segments is a list
+    of dicts compatible with the MFA/timestamp pipeline.
+    """
     segments_str = row.get("segments")
+    _empty = ('<div style="color: #999; padding: 20px;">No segment data in this log row.</div>', [], None)
     if not segments_str:
+        return _empty
     try:
         runs = json.loads(segments_str)
     except (json.JSONDecodeError, TypeError):
+        return ('<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>', [], None)
     if not runs or not isinstance(runs, list):
+        return ('<div style="color: #999; padding: 20px;">Empty segment runs.</div>', [], None)
     # Use the last run (most recent alignment pass)
     last_run = runs[-1]
     seg_list = last_run.get("segments", [])
     if not seg_list:
+        return ('<div style="color: #999; padding: 20px;">No segments in last run.</div>', [], None)
     # Try to download audio for this specific row
     audio_int16 = None
     except Exception as e:
         print(f"[dev_tools] Audio download failed: {e}")
+    # Build SegmentInfo objects and json_segments in parallel
     from src.core.segment_types import SegmentInfo
     from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
     from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
     segments = []
+    json_segments = []
+    for seg_idx, seg_data in enumerate(seg_list):
         ref = seg_data.get("ref", "")
         confidence = seg_data.get("confidence", 0.0) or 0.0
         start = seg_data.get("start", 0.0) or 0.0
         special_type = seg_data.get("special_type", "")
         duration = end - start
+        # Parse ref into ref_from/ref_to/special_type
+        if ref in ALL_SPECIAL_REFS:
+            ref_from, ref_to, parsed_special = "", "", ref
+        elif "-" in ref:
+            ref_from, ref_to = ref.split("-", 1)
+            parsed_special = ""
+        else:
+            ref_from = ref_to = ref
+            parsed_special = ""
         # Reconstruct matched_text
         matched_text = ""
         if ref in ALL_SPECIAL_REFS:
             if ref in SPECIAL_TEXT:
                 matched_text = SPECIAL_TEXT[ref]
         elif ref:
         )
         segments.append(seg_info)
+        json_segments.append({
+            "segment": seg_idx + 1,
+            "ref_from": ref_from,
+            "ref_to": ref_to,
+            "time_from": start,
+            "time_to": end,
+            "confidence": confidence,
+            "special_type": parsed_special,
+            "matched_text": matched_text,
+            "error": error,
+            "has_missing_words": has_missing,
+        })
     if not segments:
+        return ('<div style="color: #999; padding: 20px;">No valid segments to display.</div>', [], None)
+    html = render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
+                           segment_dir=segment_dir, skip_full_audio=True)
+    return html, json_segments, segment_dir
 def _download_audio_for_row(audio_id: str):
         raise ValueError("No HF token")
     from datasets import load_dataset
     ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
                       split="train", streaming=True)
             audio_array = audio_data["array"]
             sr = audio_data["sampling_rate"]
             # Convert to int16
             audio_float = np.clip(audio_array, -1.0, 1.0)
             audio_int16 = (audio_float * 32767).astype(np.int16)
+            # Clean up old dev segment directories
+            for old_dir in SEGMENT_AUDIO_DIR.glob("dev_*"):
+                if old_dir.is_dir():
+                    shutil.rmtree(old_dir, ignore_errors=True)
             # Create segment directory
             segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
             segment_dir.mkdir(parents=True, exist_ok=True)
             return audio_int16, sr, segment_dir
     raise ValueError(f"Audio ID '{audio_id}' not found in dataset")
+# ── Log timestamps → MFA results conversion ──────────────────────────
+def _log_timestamps_to_mfa_results(word_ts_json, char_ts_json):
+    """Convert logged timestamp format to MFA results format.
+    Log char_timestamps: [{ref, words: [{word, location, letters: [{char, start, end}]}]}]
+    MFA results format:  [{status: "ok", ref, words: [{word, location, start, end, letters: [...]}]}]
+    """
+    char_ts = json.loads(char_ts_json) if char_ts_json else []
+    word_ts = json.loads(word_ts_json) if word_ts_json else []
+    # Build word-level start/end lookup from word_timestamps
+    word_lookup = {}  # {ref: {word_idx: (start, end)}}
+    for entry in word_ts:
+        ref = entry.get("ref", "")
+        for widx, w in enumerate(entry.get("words", [])):
+            if w.get("start") is not None and w.get("end") is not None:
+                word_lookup.setdefault(ref, {})[widx] = (w["start"], w["end"])
+    results = []
+    if char_ts:
+        # Primary path: use char_timestamps (has location + letters)
+        for entry in char_ts:
+            ref = entry.get("ref", "")
+            ref_word_lookup = word_lookup.get(ref, {})
+            words = []
+            for widx, w in enumerate(entry.get("words", [])):
+                word_start, word_end = ref_word_lookup.get(widx, (None, None))
+                letters = w.get("letters", [])
+                # Infer word start/end from letters if not in word_timestamps
+                if word_start is None and letters:
+                    starts = [lt["start"] for lt in letters if lt.get("start") is not None]
+                    ends = [lt["end"] for lt in letters if lt.get("end") is not None]
+                    if starts and ends:
+                        word_start = min(starts)
+                        word_end = max(ends)
+                words.append({
+                    "word": w.get("word", ""),
+                    "location": w.get("location", ""),
+                    "start": word_start,
+                    "end": word_end,
+                    "letters": letters,
+                })
+            results.append({"status": "ok", "ref": ref, "words": words})
+    elif word_ts:
+        # Fallback: word_timestamps only (no letters)
+        for entry in word_ts:
+            ref = entry.get("ref", "")
+            words = []
+            for w in entry.get("words", []):
+                words.append({
+                    "word": w.get("word", ""),
+                    "location": "",
+                    "start": w.get("start"),
+                    "end": w.get("end"),
+                    "letters": [],
+                })
+            results.append({"status": "ok", "ref": ref, "words": words})
+    return results
+def _build_seg_to_result_idx_from_log(json_segments, results):
+    """Map segment indices to MFA result indices by matching refs."""
+    from src.mfa import _build_mfa_ref
+    # Build ref → result index lookup
+    ref_to_result = {}
+    for i, r in enumerate(results):
+        ref = r.get("ref", "")
+        if ref:
+            ref_to_result[ref] = i
+    seg_to_result_idx = {}
+    for seg in json_segments:
+        mfa_ref = _build_mfa_ref(seg)
+        if mfa_ref is None:
+            continue
+        seg_idx = seg.get("segment", 0) - 1
+        result_idx = ref_to_result.get(mfa_ref)
+        if result_idx is not None:
+            seg_to_result_idx[seg_idx] = result_idx
+    return seg_to_result_idx

src/ui/event_wiring.py CHANGED Viewed

@@ -505,41 +505,70 @@ def _wire_dev_tab(c):
     """Wire dev tab event handlers."""
     from src.ui.dev_tools import (
         load_logs_handler, filter_and_sort_handler, select_log_row_handler,
     )
     # Load / Refresh buttons
     _load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
     c.dev_load_btn.click(
         fn=load_logs_handler,
         inputs=[],
         outputs=_load_outputs,
         api_name=False, show_progress="minimal",
     )
     c.dev_refresh_btn.click(
         fn=load_logs_handler,
         inputs=[],
         outputs=_load_outputs,
         api_name=False, show_progress="minimal",
     )
     # Filter / Sort changes
     _filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
-                      c.dev_filter_status, c.dev_sort]
     _filter_outputs = [c.dev_filtered_indices, c.dev_table]
-    for component in [c.dev_filter_device, c.dev_filter_model, c.dev_filter_status, c.dev_sort]:
         component.change(
             fn=filter_and_sort_handler,
             inputs=_filter_inputs,
             outputs=_filter_outputs,
             api_name=False, show_progress="hidden",
         )
-    # Table row selection
     c.dev_table.select(
         fn=select_log_row_handler,
         inputs=[c.dev_all_rows, c.dev_filtered_indices],
-        outputs=[c.dev_detail_html],
         api_name=False, show_progress="minimal",
     )

     """Wire dev tab event handlers."""
     from src.ui.dev_tools import (
         load_logs_handler, filter_and_sort_handler, select_log_row_handler,
+        build_profiling_plots_handler,
     )
     # Load / Refresh buttons
     _load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
+    _plot_outputs = [c.dev_gpu_plot, c.dev_cpu_plot]
     c.dev_load_btn.click(
         fn=load_logs_handler,
         inputs=[],
         outputs=_load_outputs,
         api_name=False, show_progress="minimal",
+    ).then(
+        fn=build_profiling_plots_handler,
+        inputs=[c.dev_all_rows, c.dev_filtered_indices],
+        outputs=_plot_outputs,
+        show_progress="hidden",
     )
     c.dev_refresh_btn.click(
         fn=load_logs_handler,
         inputs=[],
         outputs=_load_outputs,
         api_name=False, show_progress="minimal",
+    ).then(
+        fn=build_profiling_plots_handler,
+        inputs=[c.dev_all_rows, c.dev_filtered_indices],
+        outputs=_plot_outputs,
+        show_progress="hidden",
     )
     # Filter / Sort changes
     _filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
+                      c.dev_filter_status, c.dev_sort, c.dev_days_filter]
     _filter_outputs = [c.dev_filtered_indices, c.dev_table]
+    for component in [c.dev_filter_device, c.dev_filter_model,
+                      c.dev_filter_status, c.dev_sort, c.dev_days_filter]:
         component.change(
             fn=filter_and_sort_handler,
             inputs=_filter_inputs,
             outputs=_filter_outputs,
             api_name=False, show_progress="hidden",
+        ).then(
+            fn=build_profiling_plots_handler,
+            inputs=[c.dev_all_rows, c.dev_filtered_indices],
+            outputs=_plot_outputs,
+            show_progress="hidden",
         )
+    # Table row selection — returns 6-tuple with timestamps + controls
     c.dev_table.select(
         fn=select_log_row_handler,
         inputs=[c.dev_all_rows, c.dev_filtered_indices],
+        outputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir,
+                 c.dev_compute_ts_btn, c.dev_animate_all_html, c.dev_compute_ts_progress],
         api_name=False, show_progress="minimal",
     )
+    # Compute Timestamps button — uses same MFA flow as main tab
+    c.dev_compute_ts_btn.click(
+        fn=compute_mfa_timestamps,
+        inputs=[c.dev_detail_html, c.dev_json_output, c.dev_segment_dir],
+        outputs=[c.dev_detail_html, c.dev_compute_ts_btn, c.dev_animate_all_html,
+                 c.dev_compute_ts_progress, c.dev_json_output],
+        api_name=False, show_progress="hidden",
+    )

src/ui/interface.py CHANGED Viewed

@@ -42,7 +42,7 @@ def build_interface():
         gr.Markdown("""
 - Transcribe and split any recitation by pauses within 1-2 minutes
 - Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
-- GPU-powered API usage with daily quotas, and unlimited CPU usage
 - Reliable confidence system to flag uncertain segments and missed words — no silent errors
 - Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
 - Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
@@ -54,9 +54,18 @@ def build_interface():
         with gr.Accordion("\U0001f4e1 API Usage", open=False):
             gr.Markdown(_api_doc)
-        with gr.Row(elem_id="main-row"):
-            _build_left_column(c)
-            _build_right_column(c)
         # State components for caching VAD data between runs
         c.cached_speech_intervals = gr.State(value=None)
@@ -206,16 +215,9 @@ def _build_animation_settings(c):
 def _build_right_column(c):
-    """Build the right output column, with optional Dev tab."""
     with gr.Column(scale=RIGHT_COLUMN_SCALE):
-        if DEV_TAB_VISIBLE:
-            with gr.Tabs():
-                with gr.Tab("Results"):
-                    _build_results_content(c)
-                with gr.Tab("Dev"):
-                    _build_dev_tab(c)
-        else:
-            _build_results_content(c)
 def _build_results_content(c):

         gr.Markdown("""
 - Transcribe and split any recitation by pauses within 1-2 minutes
 - Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
+- GPU-powered [API usage](https://huggingface.co/spaces/hetchyy/Quran-multi-aligner/blob/main/docs/client_api.md) with daily quotas, and unlimited CPU usage
 - Reliable confidence system to flag uncertain segments and missed words — no silent errors
 - Robust tolerance to noise, speaker variation and suboptimal audio quality, particularly with the large model
 - Not intended for incorrect or fragmented recitations; most suited for correct, continuous recitations (repetitions handled)
         with gr.Accordion("\U0001f4e1 API Usage", open=False):
             gr.Markdown(_api_doc)
+        if DEV_TAB_VISIBLE:
+            with gr.Tabs():
+                with gr.Tab("Results"):
+                    with gr.Row(elem_id="main-row"):
+                        _build_left_column(c)
+                        _build_right_column(c)
+                with gr.Tab("Dev"):
+                    _build_dev_tab(c)
+        else:
+            with gr.Row(elem_id="main-row"):
+                _build_left_column(c)
+                _build_right_column(c)
         # State components for caching VAD data between runs
         c.cached_speech_intervals = gr.State(value=None)
 def _build_right_column(c):
+    """Build the right output column."""
     with gr.Column(scale=RIGHT_COLUMN_SCALE):
+        _build_results_content(c)
 def _build_results_content(c):

src/ui/segments.py CHANGED Viewed

@@ -373,7 +373,7 @@ def render_segment_card(seg: SegmentInfo, idx: int, audio_int16: np.ndarray = No
     return html
-def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None) -> str:
     """Render all segments as HTML with optional audio players.
     Args:
@@ -390,7 +390,7 @@ def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate:
     # Write full audio file for unified megacard playback
     full_audio_url = ""
-    if audio_int16 is not None and sample_rate > 0 and segment_dir:
         full_path = segment_dir / "full.wav"
         with wave.open(str(full_path), 'wb') as wf:
             wf.setnchannels(1)

     return html
+def render_segments(segments: list, audio_int16: np.ndarray = None, sample_rate: int = 0, segment_dir: Path = None, skip_full_audio: bool = False) -> str:
     """Render all segments as HTML with optional audio players.
     Args:
     # Write full audio file for unified megacard playback
     full_audio_url = ""
+    if audio_int16 is not None and sample_rate > 0 and segment_dir and not skip_full_audio:
         full_path = segment_dir / "full.wav"
         with wave.open(str(full_path), 'wb') as wf:
             wf.setnchannels(1)