Spaces:

hetchyy
/

quranic-universal-aligner

Running on Zero

hetchyy Claude Opus 4.6 commited on Feb 23

Commit

881307e

1 Parent(s): 0d6804f

Add Dev tab for browsing usage logs and disable verbose debug flags

- Add in-app Dev tab (local only, hidden on HF Space) to browse
hetchyy/quran-aligner-logs dataset with filtering, sorting, and
segment rendering using the same pipeline as the main app
- Disable ANCHOR_DEBUG and PHONEME_ALIGNMENT_DEBUG in production config
- Suppress HF Hub download progress bars on cold start

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (5) hide show

app.py +4 -0
config.py +3 -2
src/ui/dev_tools.py +512 -0
src/ui/event_wiring.py +47 -0
src/ui/interface.py +49 -30

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """Quran Aligner — Automatic Quran recitation segmentation and alignment."""
 import sys
 from pathlib import Path
 # Add paths for imports BEFORE importing anything else
 _app_path = Path(__file__).parent.resolve()
 sys.path.insert(0, str(_app_path))

 """Quran Aligner — Automatic Quran recitation segmentation and alignment."""
+import os
 import sys
 from pathlib import Path
+# Suppress HF model download progress bars (hundreds of lines on cold start)
+os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
 # Add paths for imports BEFORE importing anything else
 _app_path = Path(__file__).parent.resolve()
 sys.path.insert(0, str(_app_path))

config.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 # HF Spaces detection
 IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
 # Get project root directory
 PROJECT_ROOT = Path(__file__).parent.absolute()
@@ -145,8 +146,8 @@ MAX_EDIT_DISTANCE_RELAXED = 0.4     # Relaxed threshold for retry tier 2
 MAX_CONSECUTIVE_FAILURES = 2        # Re-anchor within surah after this many DP failures
 # Debug output
-ANCHOR_DEBUG = True                 # Show detailed n-gram voting info (votes, top candidates)
-PHONEME_ALIGNMENT_DEBUG = True      # Show detailed alignment info (R, P, edit costs)
 PHONEME_ALIGNMENT_PROFILING = True  # Track and log timing breakdown (DP, window setup, etc.)
 # =============================================================================

 # HF Spaces detection
 IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
+DEV_TAB_VISIBLE = not IS_HF_SPACE
 # Get project root directory
 PROJECT_ROOT = Path(__file__).parent.absolute()
 MAX_CONSECUTIVE_FAILURES = 2        # Re-anchor within surah after this many DP failures
 # Debug output
+ANCHOR_DEBUG = False                # Show detailed n-gram voting info (votes, top candidates)
+PHONEME_ALIGNMENT_DEBUG = False     # Show detailed alignment info (R, P, edit costs)
 PHONEME_ALIGNMENT_PROFILING = True  # Track and log timing breakdown (DP, window setup, etc.)
 # =============================================================================

src/ui/dev_tools.py ADDED Viewed

	@@ -0,0 +1,512 @@

+"""Dev tab — browse and inspect usage logs from HF dataset (local only)."""
+import json
+import os
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+import gradio as gr
+import numpy as np
+from config import SEGMENT_AUDIO_DIR, SURAH_INFO_PATH
+# ── Surah names cache ──────────────────────────────────────────────────
+_surah_names: dict[int, str] | None = None
+def _load_surah_names() -> dict[int, str]:
+    global _surah_names
+    if _surah_names is not None:
+        return _surah_names
+    if not SURAH_INFO_PATH.exists():
+        _surah_names = {}
+        return _surah_names
+    with open(SURAH_INFO_PATH) as f:
+        data = json.load(f)
+    _surah_names = {int(k): v["name_en"] for k, v in data.items()}
+    return _surah_names
+# ── HF token loading (same pattern as scripts/analyze_logs.py) ─────────
+def _load_token() -> str | None:
+    token = os.environ.get("HF_TOKEN")
+    if token:
+        return token
+    env_path = Path(__file__).parent.parent.parent / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            line = line.strip()
+            if line.startswith("HF_TOKEN="):
+                return line.split("=", 1)[1]
+    return None
+# ── Dataset helpers ────────────────────────────────────────────────────
+def _has_valid_segments(segments_str) -> bool:
+    if not segments_str:
+        return False
+    try:
+        runs = json.loads(segments_str)
+        if isinstance(runs, list) and runs:
+            return any(isinstance(run, dict) and run.get("segments") for run in runs)
+    except (json.JSONDecodeError, TypeError):
+        pass
+    return False
+def _fmt_duration(seconds) -> str:
+    if seconds is None:
+        return "N/A"
+    m, s = divmod(int(seconds), 60)
+    h, m = divmod(m, 60)
+    if h > 0:
+        return f"{h}h {m}m"
+    return f"{m}m {int(s)}s"
+def _fmt_pct(val) -> str:
+    if val is None:
+        return "N/A"
+    return f"{val * 100:.1f}%"
+def _fmt_time(val) -> str:
+    if val is None:
+        return "N/A"
+    return f"{val:.1f}s"
+# ── UI builder ─────────────────────────────────────────────────────────
+def build_dev_tab_ui(c):
+    """Build the Dev tab UI components and attach them to the namespace."""
+    with gr.Row():
+        c.dev_load_btn = gr.Button("Load Logs", variant="primary", size="sm")
+        c.dev_refresh_btn = gr.Button("Refresh", size="sm")
+        c.dev_status = gr.Markdown("Click **Load Logs** to stream metadata from HF dataset.")
+    with gr.Row():
+        c.dev_filter_device = gr.Dropdown(
+            choices=["All", "GPU", "CPU"], value="All", label="Device", scale=1,
+        )
+        c.dev_filter_model = gr.Dropdown(
+            choices=["All", "Base", "Large"], value="All", label="Model", scale=1,
+        )
+        c.dev_filter_status = gr.Dropdown(
+            choices=["All", "All Passed", "Has Failures"], value="All", label="Status", scale=1,
+        )
+        c.dev_sort = gr.Dropdown(
+            choices=["Newest", "Duration", "Failures"], value="Newest", label="Sort", scale=1,
+        )
+    c.dev_table = gr.Dataframe(
+        headers=["#", "Time", "Surah", "Duration", "Segs", "Model", "Device",
+                 "Passed", "Failed", "Conf", "T1", "T2"],
+        datatype=["number", "str", "str", "str", "number", "str", "str",
+                  "number", "number", "str", "number", "number"],
+        interactive=False,
+        label="Usage Logs",
+        wrap=True,
+    )
+    c.dev_detail_html = gr.HTML(value="", label="Log Detail")
+    # State
+    c.dev_all_rows = gr.State(value=[])
+    c.dev_filtered_indices = gr.State(value=[])
+# ── Row extraction ─────────────────────────────────────────────────────
+def _row_to_dict(row) -> dict:
+    """Extract the fields we care about from a dataset row."""
+    return {
+        "audio_id": row.get("audio_id", ""),
+        "timestamp": row.get("timestamp", ""),
+        "surah": row.get("surah"),
+        "audio_duration_s": row.get("audio_duration_s"),
+        "num_segments": row.get("num_segments"),
+        "asr_model": row.get("asr_model", ""),
+        "device": row.get("device", ""),
+        "segments_passed": row.get("segments_passed"),
+        "segments_failed": row.get("segments_failed"),
+        "mean_confidence": row.get("mean_confidence"),
+        "tier1_retries": row.get("tier1_retries", 0) or 0,
+        "tier1_passed": row.get("tier1_passed", 0) or 0,
+        "tier2_retries": row.get("tier2_retries", 0) or 0,
+        "tier2_passed": row.get("tier2_passed", 0) or 0,
+        "reanchors": row.get("reanchors", 0) or 0,
+        "special_merges": row.get("special_merges", 0) or 0,
+        "total_time": row.get("total_time"),
+        "vad_queue_time": row.get("vad_queue_time"),
+        "vad_gpu_time": row.get("vad_gpu_time"),
+        "asr_gpu_time": row.get("asr_gpu_time"),
+        "dp_total_time": row.get("dp_total_time"),
+        "min_silence_ms": row.get("min_silence_ms"),
+        "min_speech_ms": row.get("min_speech_ms"),
+        "pad_ms": row.get("pad_ms"),
+        "segments": row.get("segments"),
+        "resegmented": row.get("resegmented"),
+        "retranscribed": row.get("retranscribed"),
+        "error": row.get("error"),
+    }
+# ── Table building ─────────────────────────────────────────────────────
+def _build_table_row(row_dict, index, surah_names):
+    """Build a single table row list from a row dict."""
+    ts = row_dict.get("timestamp", "")
+    try:
+        dt = datetime.fromisoformat(ts)
+        time_display = dt.strftime("%m-%d %H:%M")
+    except (ValueError, TypeError):
+        time_display = str(ts)[:16] if ts else "N/A"
+    surah = row_dict.get("surah")
+    name = surah_names.get(surah, "") if surah else ""
+    surah_display = f"{surah} {name}" if name else str(surah or "?")
+    return [
+        index + 1,
+        time_display,
+        surah_display,
+        _fmt_duration(row_dict.get("audio_duration_s")),
+        row_dict.get("num_segments") or 0,
+        row_dict.get("asr_model", "?"),
+        row_dict.get("device", "?"),
+        row_dict.get("segments_passed") or 0,
+        row_dict.get("segments_failed") or 0,
+        _fmt_pct(row_dict.get("mean_confidence")),
+        row_dict.get("tier1_retries", 0) or 0,
+        row_dict.get("tier2_retries", 0) or 0,
+    ]
+def _build_table(rows, indices, surah_names):
+    """Build table data from rows and their display indices."""
+    return [_build_table_row(rows[i], display_idx, surah_names)
+            for display_idx, i in enumerate(indices)]
+# ── Handlers ───────────────────────────────────────────────────────────
+def load_logs_handler():
+    """Stream dataset (no audio) and return rows + table."""
+    token = _load_token()
+    if not token:
+        gr.Warning("HF_TOKEN not found in .env or environment.")
+        return [], [], "HF_TOKEN not found.", gr.update()
+    try:
+        from datasets import load_dataset
+    except ImportError:
+        gr.Warning("'datasets' package not installed.")
+        return [], [], "'datasets' package not installed.", gr.update()
+    surah_names = _load_surah_names()
+    try:
+        ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
+                          split="train", streaming=True)
+        ds = ds.remove_columns("audio")
+    except Exception as e:
+        gr.Warning(f"Failed to load dataset: {e}")
+        return [], [], f"Error: {e}", gr.update()
+    rows = []
+    total = 0
+    for row in ds:
+        total += 1
+        if _has_valid_segments(row.get("segments")):
+            rows.append(_row_to_dict(row))
+    # Sort newest first
+    rows.sort(key=lambda r: r.get("timestamp") or "", reverse=True)
+    indices = list(range(len(rows)))
+    table_data = _build_table(rows, indices, surah_names)
+    status = f"Loaded {len(rows)} rows with segments (out of {total} total)."
+    return rows, indices, status, table_data
+def filter_and_sort_handler(all_rows, device, model, status_filter, sort_by):
+    """Filter and sort cached rows, return new table + index mapping."""
+    if not all_rows:
+        return [], gr.update()
+    surah_names = _load_surah_names()
+    indices = []
+    for i, row in enumerate(all_rows):
+        # Device filter
+        if device != "All":
+            row_device = (row.get("device") or "").lower()
+            if device == "GPU" and row_device not in ("cuda", "gpu"):
+                continue
+            if device == "CPU" and row_device not in ("cpu",):
+                continue
+        # Model filter
+        if model != "All":
+            row_model = row.get("asr_model", "")
+            if model == "Base" and row_model != "Base":
+                continue
+            if model == "Large" and row_model != "Large":
+                continue
+        # Status filter
+        if status_filter == "All Passed":
+            if (row.get("segments_failed") or 0) > 0:
+                continue
+        elif status_filter == "Has Failures":
+            if (row.get("segments_failed") or 0) == 0:
+                continue
+        indices.append(i)
+    # Sort
+    if sort_by == "Duration":
+        indices.sort(key=lambda i: all_rows[i].get("audio_duration_s") or 0, reverse=True)
+    elif sort_by == "Failures":
+        indices.sort(key=lambda i: all_rows[i].get("segments_failed") or 0, reverse=True)
+    # else "Newest" — already sorted by timestamp from load
+    table_data = _build_table(all_rows, indices, surah_names)
+    return indices, table_data
+def select_log_row_handler(all_rows, filtered_indices, evt: gr.SelectData):
+    """When a table row is clicked, download audio and render segments."""
+    if not all_rows or not filtered_indices:
+        return ""
+    display_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
+    if display_idx < 0 or display_idx >= len(filtered_indices):
+        return ""
+    row_idx = filtered_indices[display_idx]
+    row = all_rows[row_idx]
+    audio_id = row.get("audio_id", "")
+    surah_names = _load_surah_names()
+    # Build summary HTML
+    summary_html = _build_summary_html(row, surah_names)
+    # Try to reconstruct and render segments
+    segments_html = _build_segments_from_log(row, audio_id)
+    return summary_html + segments_html
+# ── Summary HTML builder ───────────────────────────────────────────────
+def _build_summary_html(row, surah_names) -> str:
+    """Build the 4-section summary HTML for a log row."""
+    surah = row.get("surah")
+    name = surah_names.get(surah, "") if surah else ""
+    surah_display = f"{surah} ({name})" if name else str(surah or "N/A")
+    sections = []
+    # 1. Summary
+    sections.append(f"""
+    <div style="margin-bottom: 12px; padding: 10px; background: #f8f9fa; border-radius: 6px; border-left: 3px solid #4a9eff;">
+        <strong>Summary</strong><br>
+        <span>Surah: {surah_display}</span> &nbsp;|&nbsp;
+        <span>Duration: {_fmt_duration(row.get('audio_duration_s'))}</span> &nbsp;|&nbsp;
+        <span>Segments: {row.get('num_segments', 'N/A')}</span> &nbsp;|&nbsp;
+        <span>Audio ID: <code style="font-size: 0.85em;">{row.get('audio_id', 'N/A')}</code></span>
+    </div>
+    """)
+    # 2. Settings
+    sections.append(f"""
+    <div style="margin-bottom: 12px; padding: 10px; background: #f8f9fa; border-radius: 6px; border-left: 3px solid #f0ad4e;">
+        <strong>Settings</strong><br>
+        <span>Min Silence: {row.get('min_silence_ms', 'N/A')} ms</span> &nbsp;|&nbsp;
+        <span>Min Speech: {row.get('min_speech_ms', 'N/A')} ms</span> &nbsp;|&nbsp;
+        <span>Pad: {row.get('pad_ms', 'N/A')} ms</span> &nbsp;|&nbsp;
+        <span>Model: {row.get('asr_model', 'N/A')}</span> &nbsp;|&nbsp;
+        <span>Device: {row.get('device', 'N/A')}</span>
+    </div>
+    """)
+    # 3. Profiling
+    sections.append(f"""
+    <div style="margin-bottom: 12px; padding: 10px; background: #f8f9fa; border-radius: 6px; border-left: 3px solid #5cb85c;">
+        <strong>Profiling</strong><br>
+        <span>Total: {_fmt_time(row.get('total_time'))}</span> &nbsp;|&nbsp;
+        <span>VAD Queue: {_fmt_time(row.get('vad_queue_time'))}</span> &nbsp;|&nbsp;
+        <span>VAD GPU: {_fmt_time(row.get('vad_gpu_time'))}</span> &nbsp;|&nbsp;
+        <span>ASR GPU: {_fmt_time(row.get('asr_gpu_time'))}</span> &nbsp;|&nbsp;
+        <span>DP: {_fmt_time(row.get('dp_total_time'))}</span>
+    </div>
+    """)
+    # 4. Quality
+    passed = row.get("segments_passed") or 0
+    failed = row.get("segments_failed") or 0
+    total_segs = passed + failed
+    pass_rate = f"{passed}/{total_segs}" if total_segs else "N/A"
+    t1 = f"{row.get('tier1_passed', 0) or 0}/{row.get('tier1_retries', 0) or 0}"
+    t2 = f"{row.get('tier2_passed', 0) or 0}/{row.get('tier2_retries', 0) or 0}"
+    flags = []
+    if row.get("resegmented"):
+        flags.append("Resegmented")
+    if row.get("retranscribed"):
+        flags.append("Retranscribed")
+    if row.get("error"):
+        flags.append(f"Error: {str(row['error'])[:60]}")
+    flags_html = f" &nbsp;|&nbsp; <span>Flags: {', '.join(flags)}</span>" if flags else ""
+    sections.append(f"""
+    <div style="margin-bottom: 12px; padding: 10px; background: #f8f9fa; border-radius: 6px; border-left: 3px solid #d9534f;">
+        <strong>Quality</strong><br>
+        <span>Passed: {pass_rate}</span> &nbsp;|&nbsp;
+        <span>Confidence: {_fmt_pct(row.get('mean_confidence'))}</span> &nbsp;|&nbsp;
+        <span>T1 retries: {t1}</span> &nbsp;|&nbsp;
+        <span>T2 retries: {t2}</span> &nbsp;|&nbsp;
+        <span>Reanchors: {row.get('reanchors', 0) or 0}</span>
+        {flags_html}
+    </div>
+    """)
+    return "\n".join(sections)
+# ── Segment reconstruction from log ───────────────────────────────────
+def _build_segments_from_log(row, audio_id) -> str:
+    """Build segment cards from the log's segments JSON, downloading audio on demand."""
+    segments_str = row.get("segments")
+    if not segments_str:
+        return '<div style="color: #999; padding: 20px;">No segment data in this log row.</div>'
+    try:
+        runs = json.loads(segments_str)
+    except (json.JSONDecodeError, TypeError):
+        return '<div style="color: #999; padding: 20px;">Could not parse segments JSON.</div>'
+    if not runs or not isinstance(runs, list):
+        return '<div style="color: #999; padding: 20px;">Empty segment runs.</div>'
+    # Use the last run (most recent alignment pass)
+    last_run = runs[-1]
+    seg_list = last_run.get("segments", [])
+    if not seg_list:
+        return '<div style="color: #999; padding: 20px;">No segments in last run.</div>'
+    # Try to download audio for this specific row
+    audio_int16 = None
+    sample_rate = 16000
+    segment_dir = None
+    try:
+        audio_int16, sample_rate, segment_dir = _download_audio_for_row(audio_id)
+    except Exception as e:
+        print(f"[dev_tools] Audio download failed: {e}")
+    # Build SegmentInfo objects and render
+    from src.core.segment_types import SegmentInfo
+    from src.alignment.special_segments import ALL_SPECIAL_REFS, SPECIAL_TEXT
+    from src.ui.segments import render_segments, get_text_with_markers, check_undersegmented
+    segments = []
+    for seg_data in seg_list:
+        ref = seg_data.get("ref", "")
+        confidence = seg_data.get("confidence", 0.0) or 0.0
+        start = seg_data.get("start", 0.0) or 0.0
+        end = seg_data.get("end", 0.0) or 0.0
+        error = seg_data.get("error")
+        special_type = seg_data.get("special_type", "")
+        duration = end - start
+        # Reconstruct matched_text
+        matched_text = ""
+        if ref in ALL_SPECIAL_REFS:
+            # For known specials, use the constant text
+            if ref in SPECIAL_TEXT:
+                matched_text = SPECIAL_TEXT[ref]
+            elif ref == "Isti'adha+Basmala":
+                matched_text = SPECIAL_TEXT["Isti'adha"] + " \u06dd " + SPECIAL_TEXT["Basmala"]
+        elif ref:
+            matched_text = get_text_with_markers(ref) or ""
+        # Check for undersegmentation
+        underseg = False
+        if ref and ref not in ALL_SPECIAL_REFS:
+            underseg = check_undersegmented(ref, duration)
+        # Check for missing words
+        has_missing = seg_data.get("missing_words", False) or False
+        seg_info = SegmentInfo(
+            start_time=start,
+            end_time=end,
+            transcribed_text="",
+            matched_text=matched_text,
+            matched_ref=ref,
+            match_score=confidence,
+            error=error,
+            has_missing_words=has_missing,
+            potentially_undersegmented=underseg,
+        )
+        segments.append(seg_info)
+    if not segments:
+        return '<div style="color: #999; padding: 20px;">No valid segments to display.</div>'
+    return render_segments(segments, audio_int16=audio_int16, sample_rate=sample_rate,
+                           segment_dir=segment_dir)
+def _download_audio_for_row(audio_id: str):
+    """Download audio for a specific row by streaming until audio_id matches.
+    Returns (audio_int16, sample_rate, segment_dir) or raises on failure.
+    """
+    token = _load_token()
+    if not token:
+        raise ValueError("No HF token")
+    from datasets import load_dataset
+    import librosa
+    ds = load_dataset("hetchyy/quran-aligner-logs", token=token,
+                      split="train", streaming=True)
+    for row in ds:
+        if row.get("audio_id") == audio_id:
+            audio_data = row.get("audio")
+            if audio_data is None:
+                raise ValueError("Row found but audio is None")
+            # HF Audio column returns {"path": ..., "array": np.array, "sampling_rate": int}
+            audio_array = audio_data["array"]
+            sr = audio_data["sampling_rate"]
+            # Resample to 16kHz if needed
+            if sr != 16000:
+                audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=16000)
+                sr = 16000
+            # Convert to int16
+            audio_float = np.clip(audio_array, -1.0, 1.0)
+            audio_int16 = (audio_float * 32767).astype(np.int16)
+            # Create segment directory
+            segment_dir = SEGMENT_AUDIO_DIR / f"dev_{uuid.uuid4().hex[:8]}"
+            segment_dir.mkdir(parents=True, exist_ok=True)
+            return audio_int16, sr, segment_dir
+    raise ValueError(f"Audio ID '{audio_id}' not found in dataset")

src/ui/event_wiring.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Event wiring — connects all Gradio component events."""
 import gradio as gr
 from src.core.zero_gpu import QuotaExhaustedError
 from src.pipeline import (
     process_audio, resegment_audio,
@@ -34,6 +35,8 @@ def wire_events(app, c):
     _wire_animation_settings(c)
     _wire_settings_restoration(app, c)
     _wire_api_endpoint(c)
 def _wire_preset_buttons(c):
@@ -496,3 +499,47 @@ def _wire_api_endpoint(c):
         outputs=[c.api_result],
         api_name="mfa_timestamps_direct",
     )

 """Event wiring — connects all Gradio component events."""
 import gradio as gr
+from config import DEV_TAB_VISIBLE
 from src.core.zero_gpu import QuotaExhaustedError
 from src.pipeline import (
     process_audio, resegment_audio,
     _wire_animation_settings(c)
     _wire_settings_restoration(app, c)
     _wire_api_endpoint(c)
+    if DEV_TAB_VISIBLE:
+        _wire_dev_tab(c)
 def _wire_preset_buttons(c):
         outputs=[c.api_result],
         api_name="mfa_timestamps_direct",
     )
+def _wire_dev_tab(c):
+    """Wire dev tab event handlers."""
+    from src.ui.dev_tools import (
+        load_logs_handler, filter_and_sort_handler, select_log_row_handler,
+    )
+    # Load / Refresh buttons
+    _load_outputs = [c.dev_all_rows, c.dev_filtered_indices, c.dev_status, c.dev_table]
+    c.dev_load_btn.click(
+        fn=load_logs_handler,
+        inputs=[],
+        outputs=_load_outputs,
+        api_name=False, show_progress="minimal",
+    )
+    c.dev_refresh_btn.click(
+        fn=load_logs_handler,
+        inputs=[],
+        outputs=_load_outputs,
+        api_name=False, show_progress="minimal",
+    )
+    # Filter / Sort changes
+    _filter_inputs = [c.dev_all_rows, c.dev_filter_device, c.dev_filter_model,
+                      c.dev_filter_status, c.dev_sort]
+    _filter_outputs = [c.dev_filtered_indices, c.dev_table]
+    for component in [c.dev_filter_device, c.dev_filter_model, c.dev_filter_status, c.dev_sort]:
+        component.change(
+            fn=filter_and_sort_handler,
+            inputs=_filter_inputs,
+            outputs=_filter_outputs,
+            api_name=False, show_progress="hidden",
+        )
+    # Table row selection
+    c.dev_table.select(
+        fn=select_log_row_handler,
+        inputs=[c.dev_all_rows, c.dev_filtered_indices],
+        outputs=[c.dev_detail_html],
+        api_name=False, show_progress="minimal",
+    )

src/ui/interface.py CHANGED Viewed

@@ -7,6 +7,7 @@ import gradio as gr
 from config import (
     DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE,
     ANIM_WORD_COLOR, ANIM_STYLE_ROW_SCALES,
     ANIM_DISPLAY_MODES, ANIM_DISPLAY_MODE_DEFAULT,
     ANIM_OPACITY_PREV_DEFAULT, ANIM_OPACITY_AFTER_DEFAULT, ANIM_OPACITY_STEP,
@@ -205,36 +206,54 @@ def _build_animation_settings(c):
 def _build_right_column(c):
-    """Build the right output column."""
     with gr.Column(scale=RIGHT_COLUMN_SCALE):
-        c.extract_btn = gr.Button("Extract Segments", variant="primary", size="lg")
-        with gr.Row(elem_id="action-btns-row"):
-            c.resegment_toggle_btn = gr.Button(
-                "Resegment with New Settings", variant="primary", size="lg", visible=False
-            )
-            c.retranscribe_btn = gr.Button(
-                "Retranscribe with Large Model", variant="primary", size="lg", visible=False
-            )
-        with gr.Row(elem_id="ts-row"):
-            c.compute_ts_btn = gr.Button(
-                "Compute Timestamps", variant="secondary", size="lg", interactive=False, visible=False
-            )
-            c.compute_ts_progress = gr.HTML(value="", visible=False)
-            c.animate_all_html = gr.HTML(value="", visible=False)
-        with gr.Column(visible=False) as c.resegment_panel:
-            gr.Markdown(
-                "Uses cached data, skipping the heavy computation, "
-                "so it's much faster. Useful if results are over-segmented "
-                "or under-segmented"
-            )
-            c.rs_silence, c.rs_speech, c.rs_pad, \
-                c.rs_btn_muj, c.rs_btn_mur, c.rs_btn_fast = create_segmentation_settings(id_suffix="-rs")
-            c.resegment_btn = gr.Button("Resegment", variant="primary", size="lg")
-        c.output_html = gr.HTML(
-            value='<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>',
-            elem_classes=["output-html"]
         )
-        # Hidden JSON output for API consumers
-        c.output_json = gr.JSON(visible=False, label="JSON Output")

 from config import (
     DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE,
+    DEV_TAB_VISIBLE,
     ANIM_WORD_COLOR, ANIM_STYLE_ROW_SCALES,
     ANIM_DISPLAY_MODES, ANIM_DISPLAY_MODE_DEFAULT,
     ANIM_OPACITY_PREV_DEFAULT, ANIM_OPACITY_AFTER_DEFAULT, ANIM_OPACITY_STEP,
 def _build_right_column(c):
+    """Build the right output column, with optional Dev tab."""
     with gr.Column(scale=RIGHT_COLUMN_SCALE):
+        if DEV_TAB_VISIBLE:
+            with gr.Tabs():
+                with gr.Tab("Results"):
+                    _build_results_content(c)
+                with gr.Tab("Dev"):
+                    _build_dev_tab(c)
+        else:
+            _build_results_content(c)
+def _build_results_content(c):
+    """Build the main results content (extract/resegment/output)."""
+    c.extract_btn = gr.Button("Extract Segments", variant="primary", size="lg")
+    with gr.Row(elem_id="action-btns-row"):
+        c.resegment_toggle_btn = gr.Button(
+            "Resegment with New Settings", variant="primary", size="lg", visible=False
+        )
+        c.retranscribe_btn = gr.Button(
+            "Retranscribe with Large Model", variant="primary", size="lg", visible=False
+        )
+    with gr.Row(elem_id="ts-row"):
+        c.compute_ts_btn = gr.Button(
+            "Compute Timestamps", variant="secondary", size="lg", interactive=False, visible=False
+        )
+        c.compute_ts_progress = gr.HTML(value="", visible=False)
+        c.animate_all_html = gr.HTML(value="", visible=False)
+    with gr.Column(visible=False) as c.resegment_panel:
+        gr.Markdown(
+            "Uses cached data, skipping the heavy computation, "
+            "so it's much faster. Useful if results are over-segmented "
+            "or under-segmented"
         )
+        c.rs_silence, c.rs_speech, c.rs_pad, \
+            c.rs_btn_muj, c.rs_btn_mur, c.rs_btn_fast = create_segmentation_settings(id_suffix="-rs")
+        c.resegment_btn = gr.Button("Resegment", variant="primary", size="lg")
+    c.output_html = gr.HTML(
+        value='<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>',
+        elem_classes=["output-html"]
+    )
+    # Hidden JSON output for API consumers
+    c.output_json = gr.JSON(visible=False, label="JSON Output")
+def _build_dev_tab(c):
+    """Build the Dev tab UI (delegates to dev_tools module)."""
+    from src.ui.dev_tools import build_dev_tab_ui
+    build_dev_tab_ui(c)