Spaces:

aankitdas
/

tts-eval-framework

Sleeping

App Files Files Community

aankitdas commited on Apr 14

Commit

78dff9d

1 Parent(s): 29ebcc1

added supabase stuff - now enjoy history

Browse files

Files changed (1) hide show

app/app.py +119 -79

app/app.py CHANGED Viewed

@@ -46,7 +46,7 @@ KOKORO_BAND_VOICE = {
 # ── state ─────────────────────────────────────────────────────────────────────
 _session_results: list[dict] = []
 # ── helpers ───────────────────────────────────────────────────────────────────
 def format_wer(wer):
@@ -77,15 +77,13 @@ def format_cost(engine_cost, chirp_cost, engine_name=""):
 def build_comparison_table(results: list[dict]) -> pd.DataFrame:
     columns = [
         "Engine",
-        "Type",
-        "Production",
         "Voice",
-        "UTMOS ↑ (1-5, naturalness)",
-        "WER ↓ (% words wrong, intelligibility)",
-        "RTF ↓ (synth time / audio dur, <1.0 = fast)",
         "Latency (s)",
         "Cost",
-        "Audio URL"
     ]
     if not results:
         return pd.DataFrame(columns=columns)
@@ -93,16 +91,14 @@ def build_comparison_table(results: list[dict]) -> pd.DataFrame:
     rows = []
     for r in results:
         rows.append({
-            "Engine":                                          r["engine"],
-            "Type":                                            r["engine_type"],
-            "Production":                                      "✓" if r["production_ready"] else "✗ baseline",
-            "Voice":                                           r.get("voice", "—"),
-            "UTMOS ↑ (1-5, naturalness)":                     format_utmos(r["utmos"]),
-            "WER ↓ (% words wrong, intelligibility)":         format_wer(r["wer"]),
-            "RTF ↓ (synth time / audio dur, <1.0 = fast)":   format_rtf(r["rtf"]),
-            "Latency (s)":                                     r["latency_s"],
-            "Cost":                                            format_cost(r["engine_cost_usd"], r["chirp_equiv_usd"], r["engine"]),
-            "Audio URL": r.get("audio_url") or ""
         })
     return pd.DataFrame(rows)
@@ -112,8 +108,9 @@ def build_business_chart(results: list[dict]):
     Bubble chart for business decision making.
     X = RTF (speed, lower = better)
     Y = UTMOS (quality, higher = better)
-    Bubble size = cost (larger = more expensive)
     Color = engine type
     """
     import plotly.graph_objects as go
@@ -125,7 +122,6 @@ def build_business_chart(results: list[dict]):
         )
         return fig
-    # parse RTF float from formatted string
     def parse_rtf(rtf_str):
         if rtf_str is None or rtf_str == "N/A":
             return None
@@ -134,71 +130,49 @@ def build_business_chart(results: list[dict]):
         except Exception:
             return None
-    # parse UTMOS float from formatted string
     def parse_utmos(utmos_str):
-        if utmos_str is None or utmos_str == "N/A (mp3)":
             return None
         try:
             return float(str(utmos_str).split(" ")[0])
         except Exception:
             return None
-    # parse cost float from formatted string
-    def parse_cost(cost_str):
-        if not cost_str:
-            return 0.0
-        try:
-            # extract first dollar amount
-            import re
-            match = re.search(r"\$([\d.]+)", str(cost_str))
-            return float(match.group(1)) if match else 0.0
-        except Exception:
-            return 0.0
     color_map = {
-        "neural-local":       "#2ecc71",   # green
-        "neural-cloud-free":  "#3498db",   # blue
-        "neural-cloud-paid":  "#e74c3c",   # red
-        "rule-based-local":   "#95a5a6",   # gray
     }
-    # build table from results
-    df = build_comparison_table(results)
     traces = {}
-    for _, row in df.iterrows():
-        rtf = parse_rtf(row["RTF ↓ (synth time / audio dur, <1.0 = fast)"])
-        utmos = parse_utmos(row["UTMOS ↑ (1-5, naturalness)"])
-        cost = parse_cost(row["Cost"])
         if rtf is None or utmos is None:
             continue
-        engine_name = row["Engine"]
-        engine_type = row["Type"]
-        voice = row["Voice"]
-        latency = row["Latency (s)"]
-        wer = row["WER ↓ (% words wrong, intelligibility)"]
-        production = row["Production"]
         color = color_map.get(engine_type, "#bdc3c7")
-        # bubble size: min size 15, scale with cost
-        size = 20 #max(15, cost * 5000 + 15)
         hover = (
             f"<b>{engine_name}</b><br>"
             f"Voice: {voice}<br>"
             f"UTMOS: {utmos:.3f}<br>"
             f"RTF: {rtf:.3f}x<br>"
-            f"WER: {wer}<br>"
             f"Latency: {latency}s<br>"
-            f"Cost: {row['Cost']}<br>"
             f"Production: {production}"
         )
-        label = f"{engine_name}<br>({voice})"
         if engine_type not in traces:
             traces[engine_type] = {
                 "x": [], "y": [], "sizes": [],
@@ -208,9 +182,11 @@ def build_business_chart(results: list[dict]):
         traces[engine_type]["x"].append(rtf)
         traces[engine_type]["y"].append(utmos)
         traces[engine_type]["sizes"].append(size)
         traces[engine_type]["hovers"].append(hover)
-        traces[engine_type]["labels"].append(label)
     fig = go.Figure()
@@ -231,7 +207,6 @@ def build_business_chart(results: list[dict]):
             hoverinfo="text",
         ))
-    # add quadrant lines at RTF=1.0 and UTMOS=4.0
     fig.add_vline(
         x=1.0, line_dash="dash", line_color="rgba(255,255,255,0.4)", opacity=0.8,
         annotation_text="RTF = 1.0",
@@ -245,7 +220,6 @@ def build_business_chart(results: list[dict]):
         annotation_position="right",
     )
-    # annotate ideal quadrant
     fig.add_annotation(
         x=0.1, y=4.9,
         text="✓ Ideal zone<br>(fast + high quality)",
@@ -307,20 +281,48 @@ def _make_audio_filename(engine_name: str, band: str, ext: str) -> str:
 # ── event handlers ────────────────────────────────────────────────────────────
-def on_row_select(evt: gr.SelectData, results_df: pd.DataFrame) -> tuple:
     """
-    When a row is selected, pass the Supabase public URL directly
-    to gr.Audio value — Gradio fetches it internally.
     """
     try:
         row_idx = evt.index[0]
-        url = results_df.iloc[row_idx]["Audio URL"]
-        if not url or not str(url).startswith("http"):
-            return gr.update(visible=False)
-        return gr.update(value=url, visible=True)
     except Exception as e:
-        print(f"[Playback] Failed to load audio: {e}")
-        return gr.update(visible=False)
 def on_engine_change(engine_name: str):
     """Show voice dropdown only for Kokoro."""
@@ -389,12 +391,33 @@ def run_synthesis(engine_name: str, band: str, text: str, voice: str):
         if url:
             eval_result["audio_url"] = url
             print(f"[Storage] Uploaded: {url}")
         else:
             eval_result["audio_url"] = None
     upload_audio_background(audio_path, bucket_filename, callback=_on_upload)
     eval_result["audio_url"] = None  # placeholder until upload completes
     _session_results.append(eval_result)
     status = (
         f"✓ Done — "
@@ -407,6 +430,7 @@ def run_synthesis(engine_name: str, band: str, text: str, voice: str):
 def clear_results():
     _session_results.clear()
     return build_comparison_table(_session_results), build_business_chart(_session_results), "Results cleared."
@@ -425,6 +449,8 @@ def export_all():
     return gr.update(value=_EVAL_LOG_PATH, visible=True), "✓ Full history log ready to download."
 def load_history():
     # try Supabase first, fall back to local CSV
     try:
         from storage import download_csv
@@ -435,11 +461,18 @@ def load_history():
     if not os.path.exists(_EVAL_LOG_PATH):
         return build_comparison_table([]), build_business_chart([]), "⚠ No history found."
     try:
-        df = pd.read_csv(_EVAL_LOG_PATH)
-        # fill missing audio_url column for old rows that predate storage
         if "audio_url" not in df.columns:
             df["audio_url"] = ""
         records = df.to_dict(orient="records")
         return build_comparison_table(records), build_business_chart(records), f"✓ Loaded {len(records)} historical runs."
     except Exception as e:
         return build_comparison_table([]), build_business_chart([]), f"✗ Failed: {e}"
@@ -502,11 +535,18 @@ def build_ui():
             interactive=False,
         )
-        row_audio_player = gr.Audio(
-            label="Selected Row Audio",
-            visible=False,
-            type="filepath",
-        )
         business_chart = gr.Plot(
             value=build_business_chart([]),
@@ -554,8 +594,8 @@ def build_ui():
         )
         comparison_table.select(
             fn=on_row_select,
-            inputs=[comparison_table],
-            outputs=[row_audio_player],
         )
         load_history_btn.click(

 # ── state ─────────────────────────────────────────────────────────────────────
 _session_results: list[dict] = []
+_session_audio_urls: list[str] = []
 # ── helpers ───────────────────────────────────────────────────────────────────
 def format_wer(wer):
 def build_comparison_table(results: list[dict]) -> pd.DataFrame:
     columns = [
         "Engine",
+        "Band",
         "Voice",
+        "UTMOS ↑",
+        "WER ↓",
+        "RTF ↓",
         "Latency (s)",
         "Cost",
     ]
     if not results:
         return pd.DataFrame(columns=columns)
     rows = []
     for r in results:
         rows.append({
+            "Engine":       r["engine"],
+            "Band":         r["band"],
+            "Voice":        r.get("voice", "—"),
+            "UTMOS ↑":      format_utmos(r["utmos"]),
+            "WER ↓":        format_wer(r["wer"]),
+            "RTF ↓":        format_rtf(r["rtf"]),
+            "Latency (s)":  r["latency_s"],
+            "Cost":         format_cost(r["engine_cost_usd"], r["chirp_equiv_usd"], r["engine"]),
         })
     return pd.DataFrame(rows)
     Bubble chart for business decision making.
     X = RTF (speed, lower = better)
     Y = UTMOS (quality, higher = better)
+    Bubble size = fixed (cost removed from visual)
     Color = engine type
+    Reads directly from results dicts — no dependency on display column names.
     """
     import plotly.graph_objects as go
         )
         return fig
     def parse_rtf(rtf_str):
         if rtf_str is None or rtf_str == "N/A":
             return None
         except Exception:
             return None
     def parse_utmos(utmos_str):
+        if utmos_str is None or utmos_str == "N/A":
             return None
         try:
             return float(str(utmos_str).split(" ")[0])
         except Exception:
             return None
     color_map = {
+        "neural-local":      "#2ecc71",
+        "neural-cloud-free": "#3498db",
+        "neural-cloud-paid": "#e74c3c",
+        "rule-based-local":  "#95a5a6",
     }
     traces = {}
+    for r in results:
+        rtf = parse_rtf(format_rtf(r.get("rtf")))
+        utmos = parse_utmos(format_utmos(r.get("utmos")))
         if rtf is None or utmos is None:
             continue
+        engine_name = r["engine"]
+        engine_type = r.get("engine_type", "neural-local")
+        voice = r.get("voice", "—")
+        latency = r.get("latency_s", "—")
+        wer_str = format_wer(r.get("wer"))
+        production = "✓" if r.get("production_ready") else "✗"
         color = color_map.get(engine_type, "#bdc3c7")
         hover = (
             f"<b>{engine_name}</b><br>"
             f"Voice: {voice}<br>"
             f"UTMOS: {utmos:.3f}<br>"
             f"RTF: {rtf:.3f}x<br>"
+            f"WER: {wer_str}<br>"
             f"Latency: {latency}s<br>"
+            f"Cost: {format_cost(r.get('engine_cost_usd', 0), r.get('chirp_equiv_usd', 0), engine_name)}<br>"
             f"Production: {production}"
         )
         if engine_type not in traces:
             traces[engine_type] = {
                 "x": [], "y": [], "sizes": [],
         traces[engine_type]["x"].append(rtf)
         traces[engine_type]["y"].append(utmos)
+        cost = r.get("engine_cost_usd", 0) or 0
+        size = 20 + min(cost * 2000, 25)
         traces[engine_type]["sizes"].append(size)
         traces[engine_type]["hovers"].append(hover)
+        traces[engine_type]["labels"].append(f"{engine_name}<br>({voice})")
     fig = go.Figure()
             hoverinfo="text",
         ))
     fig.add_vline(
         x=1.0, line_dash="dash", line_color="rgba(255,255,255,0.4)", opacity=0.8,
         annotation_text="RTF = 1.0",
         annotation_position="right",
     )
     fig.add_annotation(
         x=0.1, y=4.9,
         text="✓ Ideal zone<br>(fast + high quality)",
 # ── event handlers ────────────────────────────────────────────────────────────
+def on_row_select(evt: gr.SelectData) -> tuple:
     """
+    On row click: play audio and show metrics detail card.
+    Uses _session_audio_urls indexed by row — URL never shown in table.
+    Falls back to load_history URLs if session list is shorter (history mode).
     """
     try:
         row_idx = evt.index[0]
+        # get audio url
+        url = None
+        if row_idx < len(_session_audio_urls):
+            url = _session_audio_urls[row_idx]
+        # get result for detail card
+        result = None
+        if row_idx < len(_session_results):
+            result = _session_results[row_idx]
+        # build detail markdown
+        if result:
+            detail = (
+                f"**Engine:** {result['engine']}  |  "
+                f"**Band:** {result['band']}  |  "
+                f"**Voice:** {result.get('voice', '—')}\n\n"
+                f"**UTMOS:** {format_utmos(result['utmos'])}  |  "
+                f"**WER:** {format_wer(result['wer'])}  |  "
+                f"**RTF:** {format_rtf(result['rtf'])}  |  "
+                f"**Latency:** {result['latency_s']}s  |  "
+                f"**Cost:** {format_cost(result['engine_cost_usd'], result['chirp_equiv_usd'], result['engine'])}\n\n"
+                f"**Text:** {result.get('input_text', '—')}"
+            )
+        else:
+            detail = ""
+        if url and str(url).startswith("http"):
+            return gr.update(value=url, visible=True), gr.update(value=detail, visible=True)
+        return gr.update(visible=False), gr.update(value=detail, visible=bool(detail))
     except Exception as e:
+        print(f"[Playback] Row select failed: {e}")
+        return gr.update(visible=False), gr.update(visible=False)
 def on_engine_change(engine_name: str):
     """Show voice dropdown only for Kokoro."""
         if url:
             eval_result["audio_url"] = url
             print(f"[Storage] Uploaded: {url}")
+            # update the CSV row with the real audio URL
+            try:
+                import pandas as pd
+                if os.path.exists(_EVAL_LOG_PATH):
+                    df = pd.read_csv(_EVAL_LOG_PATH, dtype={"audio_url": str})
+                    if "audio_url" not in df.columns:
+                        df["audio_url"] = ""
+                    # match by timestamp + engine + band — unique enough
+                    mask = (
+                        (df["timestamp"] == eval_result["timestamp"]) &
+                        (df["engine"] == eval_result["engine"]) &
+                        (df["band"] == eval_result["band"])
+                    )
+                    df.loc[mask, "audio_url"] = url
+                    df.to_csv(_EVAL_LOG_PATH, index=False)
+                    # re-upload updated CSV to Supabase
+                    from storage import upload_csv_background
+                    upload_csv_background(_EVAL_LOG_PATH)
+            except Exception as e:
+                print(f"[Storage] CSV audio_url update failed: {e}")
         else:
             eval_result["audio_url"] = None
     upload_audio_background(audio_path, bucket_filename, callback=_on_upload)
     eval_result["audio_url"] = None  # placeholder until upload completes
     _session_results.append(eval_result)
+    _session_audio_urls.append(eval_result.get("audio_url") or "")
     status = (
         f"✓ Done — "
 def clear_results():
     _session_results.clear()
+    _session_audio_urls.clear()
     return build_comparison_table(_session_results), build_business_chart(_session_results), "Results cleared."
     return gr.update(value=_EVAL_LOG_PATH, visible=True), "✓ Full history log ready to download."
 def load_history():
+    global _session_results, _session_audio_urls
     # try Supabase first, fall back to local CSV
     try:
         from storage import download_csv
     if not os.path.exists(_EVAL_LOG_PATH):
         return build_comparison_table([]), build_business_chart([]), "⚠ No history found."
     try:
+        df = pd.read_csv(_EVAL_LOG_PATH, dtype={"audio_url": str})
         if "audio_url" not in df.columns:
             df["audio_url"] = ""
         records = df.to_dict(orient="records")
+        # populate session state so row click works
+        _session_results = records
+        _session_audio_urls = [
+            str(r.get("audio_url", "")) if str(r.get("audio_url", "")) not in ("nan", "None", "") else ""
+            for r in records
+        ]
         return build_comparison_table(records), build_business_chart(records), f"✓ Loaded {len(records)} historical runs."
     except Exception as e:
         return build_comparison_table([]), build_business_chart([]), f"✗ Failed: {e}"
             interactive=False,
         )
+        with gr.Row():
+            with gr.Column(scale=1):
+                row_audio_player = gr.Audio(
+                    label="▶ Selected Row Audio",
+                    visible=False,
+                    type="filepath",
+                )
+            with gr.Column(scale=2):
+                row_detail = gr.Markdown(
+                    value="",
+                    visible=False,
+                )
         business_chart = gr.Plot(
             value=build_business_chart([]),
         )
         comparison_table.select(
             fn=on_row_select,
+            inputs=[],
+            outputs=[row_audio_player, row_detail],
         )
         load_history_btn.click(