aankitdas commited on
Commit
78dff9d
Β·
1 Parent(s): 29ebcc1

added supabase stuff - now enjoy history

Browse files
Files changed (1) hide show
  1. app/app.py +119 -79
app/app.py CHANGED
@@ -46,7 +46,7 @@ KOKORO_BAND_VOICE = {
46
  # ── state ─────────────────────────────────────────────────────────────────────
47
 
48
  _session_results: list[dict] = []
49
-
50
  # ── helpers ───────────────────────────────────────────────────────────────────
51
 
52
  def format_wer(wer):
@@ -77,15 +77,13 @@ def format_cost(engine_cost, chirp_cost, engine_name=""):
77
  def build_comparison_table(results: list[dict]) -> pd.DataFrame:
78
  columns = [
79
  "Engine",
80
- "Type",
81
- "Production",
82
  "Voice",
83
- "UTMOS ↑ (1-5, naturalness)",
84
- "WER ↓ (% words wrong, intelligibility)",
85
- "RTF ↓ (synth time / audio dur, <1.0 = fast)",
86
  "Latency (s)",
87
  "Cost",
88
- "Audio URL"
89
  ]
90
  if not results:
91
  return pd.DataFrame(columns=columns)
@@ -93,16 +91,14 @@ def build_comparison_table(results: list[dict]) -> pd.DataFrame:
93
  rows = []
94
  for r in results:
95
  rows.append({
96
- "Engine": r["engine"],
97
- "Type": r["engine_type"],
98
- "Production": "βœ“" if r["production_ready"] else "βœ— baseline",
99
- "Voice": r.get("voice", "β€”"),
100
- "UTMOS ↑ (1-5, naturalness)": format_utmos(r["utmos"]),
101
- "WER ↓ (% words wrong, intelligibility)": format_wer(r["wer"]),
102
- "RTF ↓ (synth time / audio dur, <1.0 = fast)": format_rtf(r["rtf"]),
103
- "Latency (s)": r["latency_s"],
104
- "Cost": format_cost(r["engine_cost_usd"], r["chirp_equiv_usd"], r["engine"]),
105
- "Audio URL": r.get("audio_url") or ""
106
  })
107
  return pd.DataFrame(rows)
108
 
@@ -112,8 +108,9 @@ def build_business_chart(results: list[dict]):
112
  Bubble chart for business decision making.
113
  X = RTF (speed, lower = better)
114
  Y = UTMOS (quality, higher = better)
115
- Bubble size = cost (larger = more expensive)
116
  Color = engine type
 
117
  """
118
  import plotly.graph_objects as go
119
 
@@ -125,7 +122,6 @@ def build_business_chart(results: list[dict]):
125
  )
126
  return fig
127
 
128
- # parse RTF float from formatted string
129
  def parse_rtf(rtf_str):
130
  if rtf_str is None or rtf_str == "N/A":
131
  return None
@@ -134,71 +130,49 @@ def build_business_chart(results: list[dict]):
134
  except Exception:
135
  return None
136
 
137
- # parse UTMOS float from formatted string
138
  def parse_utmos(utmos_str):
139
- if utmos_str is None or utmos_str == "N/A (mp3)":
140
  return None
141
  try:
142
  return float(str(utmos_str).split(" ")[0])
143
  except Exception:
144
  return None
145
 
146
- # parse cost float from formatted string
147
- def parse_cost(cost_str):
148
- if not cost_str:
149
- return 0.0
150
- try:
151
- # extract first dollar amount
152
- import re
153
- match = re.search(r"\$([\d.]+)", str(cost_str))
154
- return float(match.group(1)) if match else 0.0
155
- except Exception:
156
- return 0.0
157
-
158
  color_map = {
159
- "neural-local": "#2ecc71", # green
160
- "neural-cloud-free": "#3498db", # blue
161
- "neural-cloud-paid": "#e74c3c", # red
162
- "rule-based-local": "#95a5a6", # gray
163
  }
164
 
165
- # build table from results
166
- df = build_comparison_table(results)
167
-
168
  traces = {}
169
- for _, row in df.iterrows():
170
- rtf = parse_rtf(row["RTF ↓ (synth time / audio dur, <1.0 = fast)"])
171
- utmos = parse_utmos(row["UTMOS ↑ (1-5, naturalness)"])
172
- cost = parse_cost(row["Cost"])
173
 
174
  if rtf is None or utmos is None:
175
  continue
176
 
177
- engine_name = row["Engine"]
178
- engine_type = row["Type"]
179
- voice = row["Voice"]
180
- latency = row["Latency (s)"]
181
- wer = row["WER ↓ (% words wrong, intelligibility)"]
182
- production = row["Production"]
183
-
184
  color = color_map.get(engine_type, "#bdc3c7")
185
 
186
- # bubble size: min size 15, scale with cost
187
- size = 20 #max(15, cost * 5000 + 15)
188
-
189
  hover = (
190
  f"<b>{engine_name}</b><br>"
191
  f"Voice: {voice}<br>"
192
  f"UTMOS: {utmos:.3f}<br>"
193
  f"RTF: {rtf:.3f}x<br>"
194
- f"WER: {wer}<br>"
195
  f"Latency: {latency}s<br>"
196
- f"Cost: {row['Cost']}<br>"
197
  f"Production: {production}"
198
  )
199
 
200
- label = f"{engine_name}<br>({voice})"
201
-
202
  if engine_type not in traces:
203
  traces[engine_type] = {
204
  "x": [], "y": [], "sizes": [],
@@ -208,9 +182,11 @@ def build_business_chart(results: list[dict]):
208
 
209
  traces[engine_type]["x"].append(rtf)
210
  traces[engine_type]["y"].append(utmos)
 
 
211
  traces[engine_type]["sizes"].append(size)
212
  traces[engine_type]["hovers"].append(hover)
213
- traces[engine_type]["labels"].append(label)
214
 
215
  fig = go.Figure()
216
 
@@ -231,7 +207,6 @@ def build_business_chart(results: list[dict]):
231
  hoverinfo="text",
232
  ))
233
 
234
- # add quadrant lines at RTF=1.0 and UTMOS=4.0
235
  fig.add_vline(
236
  x=1.0, line_dash="dash", line_color="rgba(255,255,255,0.4)", opacity=0.8,
237
  annotation_text="RTF = 1.0",
@@ -245,7 +220,6 @@ def build_business_chart(results: list[dict]):
245
  annotation_position="right",
246
  )
247
 
248
- # annotate ideal quadrant
249
  fig.add_annotation(
250
  x=0.1, y=4.9,
251
  text="βœ“ Ideal zone<br>(fast + high quality)",
@@ -307,20 +281,48 @@ def _make_audio_filename(engine_name: str, band: str, ext: str) -> str:
307
 
308
  # ── event handlers ────────────────────────────────────────────────────────────
309
 
310
- def on_row_select(evt: gr.SelectData, results_df: pd.DataFrame) -> tuple:
311
  """
312
- When a row is selected, pass the Supabase public URL directly
313
- to gr.Audio value β€” Gradio fetches it internally.
 
314
  """
315
  try:
316
  row_idx = evt.index[0]
317
- url = results_df.iloc[row_idx]["Audio URL"]
318
- if not url or not str(url).startswith("http"):
319
- return gr.update(visible=False)
320
- return gr.update(value=url, visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  except Exception as e:
322
- print(f"[Playback] Failed to load audio: {e}")
323
- return gr.update(visible=False)
324
 
325
  def on_engine_change(engine_name: str):
326
  """Show voice dropdown only for Kokoro."""
@@ -389,12 +391,33 @@ def run_synthesis(engine_name: str, band: str, text: str, voice: str):
389
  if url:
390
  eval_result["audio_url"] = url
391
  print(f"[Storage] Uploaded: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  else:
393
  eval_result["audio_url"] = None
394
 
395
  upload_audio_background(audio_path, bucket_filename, callback=_on_upload)
396
  eval_result["audio_url"] = None # placeholder until upload completes
397
  _session_results.append(eval_result)
 
398
 
399
  status = (
400
  f"βœ“ Done β€” "
@@ -407,6 +430,7 @@ def run_synthesis(engine_name: str, band: str, text: str, voice: str):
407
 
408
  def clear_results():
409
  _session_results.clear()
 
410
  return build_comparison_table(_session_results), build_business_chart(_session_results), "Results cleared."
411
 
412
 
@@ -425,6 +449,8 @@ def export_all():
425
  return gr.update(value=_EVAL_LOG_PATH, visible=True), "βœ“ Full history log ready to download."
426
 
427
  def load_history():
 
 
428
  # try Supabase first, fall back to local CSV
429
  try:
430
  from storage import download_csv
@@ -435,11 +461,18 @@ def load_history():
435
  if not os.path.exists(_EVAL_LOG_PATH):
436
  return build_comparison_table([]), build_business_chart([]), "⚠ No history found."
437
  try:
438
- df = pd.read_csv(_EVAL_LOG_PATH)
439
- # fill missing audio_url column for old rows that predate storage
440
  if "audio_url" not in df.columns:
441
  df["audio_url"] = ""
442
  records = df.to_dict(orient="records")
 
 
 
 
 
 
 
 
443
  return build_comparison_table(records), build_business_chart(records), f"βœ“ Loaded {len(records)} historical runs."
444
  except Exception as e:
445
  return build_comparison_table([]), build_business_chart([]), f"βœ— Failed: {e}"
@@ -502,11 +535,18 @@ def build_ui():
502
  interactive=False,
503
  )
504
 
505
- row_audio_player = gr.Audio(
506
- label="Selected Row Audio",
507
- visible=False,
508
- type="filepath",
509
- )
 
 
 
 
 
 
 
510
 
511
  business_chart = gr.Plot(
512
  value=build_business_chart([]),
@@ -554,8 +594,8 @@ def build_ui():
554
  )
555
  comparison_table.select(
556
  fn=on_row_select,
557
- inputs=[comparison_table],
558
- outputs=[row_audio_player],
559
  )
560
 
561
  load_history_btn.click(
 
46
  # ── state ─────────────────────────────────────────────────────────────────────
47
 
48
  _session_results: list[dict] = []
49
+ _session_audio_urls: list[str] = []
50
  # ── helpers ───────────────────────────────────────────────────────────────────
51
 
52
  def format_wer(wer):
 
77
  def build_comparison_table(results: list[dict]) -> pd.DataFrame:
78
  columns = [
79
  "Engine",
80
+ "Band",
 
81
  "Voice",
82
+ "UTMOS ↑",
83
+ "WER ↓",
84
+ "RTF ↓",
85
  "Latency (s)",
86
  "Cost",
 
87
  ]
88
  if not results:
89
  return pd.DataFrame(columns=columns)
 
91
  rows = []
92
  for r in results:
93
  rows.append({
94
+ "Engine": r["engine"],
95
+ "Band": r["band"],
96
+ "Voice": r.get("voice", "β€”"),
97
+ "UTMOS ↑": format_utmos(r["utmos"]),
98
+ "WER ↓": format_wer(r["wer"]),
99
+ "RTF ↓": format_rtf(r["rtf"]),
100
+ "Latency (s)": r["latency_s"],
101
+ "Cost": format_cost(r["engine_cost_usd"], r["chirp_equiv_usd"], r["engine"]),
 
 
102
  })
103
  return pd.DataFrame(rows)
104
 
 
108
  Bubble chart for business decision making.
109
  X = RTF (speed, lower = better)
110
  Y = UTMOS (quality, higher = better)
111
+ Bubble size = fixed (cost removed from visual)
112
  Color = engine type
113
+ Reads directly from results dicts β€” no dependency on display column names.
114
  """
115
  import plotly.graph_objects as go
116
 
 
122
  )
123
  return fig
124
 
 
125
  def parse_rtf(rtf_str):
126
  if rtf_str is None or rtf_str == "N/A":
127
  return None
 
130
  except Exception:
131
  return None
132
 
 
133
  def parse_utmos(utmos_str):
134
+ if utmos_str is None or utmos_str == "N/A":
135
  return None
136
  try:
137
  return float(str(utmos_str).split(" ")[0])
138
  except Exception:
139
  return None
140
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  color_map = {
142
+ "neural-local": "#2ecc71",
143
+ "neural-cloud-free": "#3498db",
144
+ "neural-cloud-paid": "#e74c3c",
145
+ "rule-based-local": "#95a5a6",
146
  }
147
 
 
 
 
148
  traces = {}
149
+
150
+ for r in results:
151
+ rtf = parse_rtf(format_rtf(r.get("rtf")))
152
+ utmos = parse_utmos(format_utmos(r.get("utmos")))
153
 
154
  if rtf is None or utmos is None:
155
  continue
156
 
157
+ engine_name = r["engine"]
158
+ engine_type = r.get("engine_type", "neural-local")
159
+ voice = r.get("voice", "β€”")
160
+ latency = r.get("latency_s", "β€”")
161
+ wer_str = format_wer(r.get("wer"))
162
+ production = "βœ“" if r.get("production_ready") else "βœ—"
 
163
  color = color_map.get(engine_type, "#bdc3c7")
164
 
 
 
 
165
  hover = (
166
  f"<b>{engine_name}</b><br>"
167
  f"Voice: {voice}<br>"
168
  f"UTMOS: {utmos:.3f}<br>"
169
  f"RTF: {rtf:.3f}x<br>"
170
+ f"WER: {wer_str}<br>"
171
  f"Latency: {latency}s<br>"
172
+ f"Cost: {format_cost(r.get('engine_cost_usd', 0), r.get('chirp_equiv_usd', 0), engine_name)}<br>"
173
  f"Production: {production}"
174
  )
175
 
 
 
176
  if engine_type not in traces:
177
  traces[engine_type] = {
178
  "x": [], "y": [], "sizes": [],
 
182
 
183
  traces[engine_type]["x"].append(rtf)
184
  traces[engine_type]["y"].append(utmos)
185
+ cost = r.get("engine_cost_usd", 0) or 0
186
+ size = 20 + min(cost * 2000, 25)
187
  traces[engine_type]["sizes"].append(size)
188
  traces[engine_type]["hovers"].append(hover)
189
+ traces[engine_type]["labels"].append(f"{engine_name}<br>({voice})")
190
 
191
  fig = go.Figure()
192
 
 
207
  hoverinfo="text",
208
  ))
209
 
 
210
  fig.add_vline(
211
  x=1.0, line_dash="dash", line_color="rgba(255,255,255,0.4)", opacity=0.8,
212
  annotation_text="RTF = 1.0",
 
220
  annotation_position="right",
221
  )
222
 
 
223
  fig.add_annotation(
224
  x=0.1, y=4.9,
225
  text="βœ“ Ideal zone<br>(fast + high quality)",
 
281
 
282
  # ── event handlers ────────────────────────────────────────────────────────────
283
 
284
+ def on_row_select(evt: gr.SelectData) -> tuple:
285
  """
286
+ On row click: play audio and show metrics detail card.
287
+ Uses _session_audio_urls indexed by row β€” URL never shown in table.
288
+ Falls back to load_history URLs if session list is shorter (history mode).
289
  """
290
  try:
291
  row_idx = evt.index[0]
292
+
293
+ # get audio url
294
+ url = None
295
+ if row_idx < len(_session_audio_urls):
296
+ url = _session_audio_urls[row_idx]
297
+
298
+ # get result for detail card
299
+ result = None
300
+ if row_idx < len(_session_results):
301
+ result = _session_results[row_idx]
302
+
303
+ # build detail markdown
304
+ if result:
305
+ detail = (
306
+ f"**Engine:** {result['engine']} | "
307
+ f"**Band:** {result['band']} | "
308
+ f"**Voice:** {result.get('voice', 'β€”')}\n\n"
309
+ f"**UTMOS:** {format_utmos(result['utmos'])} | "
310
+ f"**WER:** {format_wer(result['wer'])} | "
311
+ f"**RTF:** {format_rtf(result['rtf'])} | "
312
+ f"**Latency:** {result['latency_s']}s | "
313
+ f"**Cost:** {format_cost(result['engine_cost_usd'], result['chirp_equiv_usd'], result['engine'])}\n\n"
314
+ f"**Text:** {result.get('input_text', 'β€”')}"
315
+ )
316
+ else:
317
+ detail = ""
318
+
319
+ if url and str(url).startswith("http"):
320
+ return gr.update(value=url, visible=True), gr.update(value=detail, visible=True)
321
+ return gr.update(visible=False), gr.update(value=detail, visible=bool(detail))
322
+
323
  except Exception as e:
324
+ print(f"[Playback] Row select failed: {e}")
325
+ return gr.update(visible=False), gr.update(visible=False)
326
 
327
  def on_engine_change(engine_name: str):
328
  """Show voice dropdown only for Kokoro."""
 
391
  if url:
392
  eval_result["audio_url"] = url
393
  print(f"[Storage] Uploaded: {url}")
394
+ # update the CSV row with the real audio URL
395
+ try:
396
+ import pandas as pd
397
+ if os.path.exists(_EVAL_LOG_PATH):
398
+ df = pd.read_csv(_EVAL_LOG_PATH, dtype={"audio_url": str})
399
+ if "audio_url" not in df.columns:
400
+ df["audio_url"] = ""
401
+ # match by timestamp + engine + band β€” unique enough
402
+ mask = (
403
+ (df["timestamp"] == eval_result["timestamp"]) &
404
+ (df["engine"] == eval_result["engine"]) &
405
+ (df["band"] == eval_result["band"])
406
+ )
407
+ df.loc[mask, "audio_url"] = url
408
+ df.to_csv(_EVAL_LOG_PATH, index=False)
409
+ # re-upload updated CSV to Supabase
410
+ from storage import upload_csv_background
411
+ upload_csv_background(_EVAL_LOG_PATH)
412
+ except Exception as e:
413
+ print(f"[Storage] CSV audio_url update failed: {e}")
414
  else:
415
  eval_result["audio_url"] = None
416
 
417
  upload_audio_background(audio_path, bucket_filename, callback=_on_upload)
418
  eval_result["audio_url"] = None # placeholder until upload completes
419
  _session_results.append(eval_result)
420
+ _session_audio_urls.append(eval_result.get("audio_url") or "")
421
 
422
  status = (
423
  f"βœ“ Done β€” "
 
430
 
431
  def clear_results():
432
  _session_results.clear()
433
+ _session_audio_urls.clear()
434
  return build_comparison_table(_session_results), build_business_chart(_session_results), "Results cleared."
435
 
436
 
 
449
  return gr.update(value=_EVAL_LOG_PATH, visible=True), "βœ“ Full history log ready to download."
450
 
451
  def load_history():
452
+ global _session_results, _session_audio_urls
453
+
454
  # try Supabase first, fall back to local CSV
455
  try:
456
  from storage import download_csv
 
461
  if not os.path.exists(_EVAL_LOG_PATH):
462
  return build_comparison_table([]), build_business_chart([]), "⚠ No history found."
463
  try:
464
+ df = pd.read_csv(_EVAL_LOG_PATH, dtype={"audio_url": str})
 
465
  if "audio_url" not in df.columns:
466
  df["audio_url"] = ""
467
  records = df.to_dict(orient="records")
468
+
469
+ # populate session state so row click works
470
+ _session_results = records
471
+ _session_audio_urls = [
472
+ str(r.get("audio_url", "")) if str(r.get("audio_url", "")) not in ("nan", "None", "") else ""
473
+ for r in records
474
+ ]
475
+
476
  return build_comparison_table(records), build_business_chart(records), f"βœ“ Loaded {len(records)} historical runs."
477
  except Exception as e:
478
  return build_comparison_table([]), build_business_chart([]), f"βœ— Failed: {e}"
 
535
  interactive=False,
536
  )
537
 
538
+ with gr.Row():
539
+ with gr.Column(scale=1):
540
+ row_audio_player = gr.Audio(
541
+ label="β–Ά Selected Row Audio",
542
+ visible=False,
543
+ type="filepath",
544
+ )
545
+ with gr.Column(scale=2):
546
+ row_detail = gr.Markdown(
547
+ value="",
548
+ visible=False,
549
+ )
550
 
551
  business_chart = gr.Plot(
552
  value=build_business_chart([]),
 
594
  )
595
  comparison_table.select(
596
  fn=on_row_select,
597
+ inputs=[],
598
+ outputs=[row_audio_player, row_detail],
599
  )
600
 
601
  load_history_btn.click(