Marcel0123 commited on
Commit
eadd348
·
verified ·
1 Parent(s): 3349dc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -13
app.py CHANGED
@@ -69,11 +69,15 @@ def diagnostics_text() -> str:
69
  lines.append("- *(none found next to app.py)*")
70
 
71
  lines.append("")
72
- lines.append("**Microphone note:** If recording doesn’t work, it’s usually browser permissions / corporate policy.")
73
  lines.append("Try opening the Space in a new tab and allow microphone access.")
74
  return "\n".join(lines)
75
 
76
 
 
 
 
 
77
  # =========================================================
78
  # Features
79
  # =========================================================
@@ -253,6 +257,9 @@ def plot_pitch(art: Dict[str, Any]) -> plt.Figure:
253
  return fig
254
 
255
 
 
 
 
256
  def features_table(feats: Features) -> List[List[str]]:
257
  def f3(x):
258
  return "—" if (x is None or not math.isfinite(x)) else f"{float(x):.3f}"
@@ -278,11 +285,91 @@ def explain_single(feats: Features) -> str:
278
  )
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def explain_timeline() -> str:
282
  return (
283
  "### Timeline principle\n"
284
- "- Upload/select **multiple recordings of the same person**.\n"
285
  "- The key is **within-person change over time** relative to baseline.\n"
 
286
  )
287
 
288
 
@@ -302,17 +389,26 @@ def analyze_many_paths(paths: List[str]):
302
  return (
303
  [[1, "—", "Upload/select at least 2 recordings.", "", "", "", "", ""]],
304
  None,
305
- "### Upload/select at least 2 recordings."
 
306
  )
307
 
308
  rows = []
309
  pause_series, pitch_series, rms_series = [], [], []
310
 
 
 
 
 
311
  for idx, path in enumerate(paths, start=1):
312
  name = os.path.basename(path)
313
  y, sr = load_audio_file(path)
314
  feats, _ = compute_features(y, sr)
315
 
 
 
 
 
316
  pause_s = feats.pause_total_s if math.isfinite(feats.pause_total_s) else np.nan
317
  pitch_hz = feats.pitch_median_hz if math.isfinite(feats.pitch_median_hz) else np.nan
318
  rms_m = feats.rms_mean if math.isfinite(feats.rms_mean) else np.nan
@@ -345,7 +441,11 @@ def analyze_many_paths(paths: List[str]):
345
  ax.legend(loc="best")
346
  fig.tight_layout()
347
 
348
- return rows, fig, explain_timeline()
 
 
 
 
349
 
350
 
351
  def analyze_many_uploaded(files):
@@ -366,7 +466,6 @@ def analyze_many_bundled(selected_filenames: List[str]):
366
 
367
  def refresh_bundled():
368
  bundled = list_bundled_audio()
369
- # Return updated choices and refreshed diagnostics text
370
  return gr.update(choices=bundled, value=[]), diagnostics_text()
371
 
372
 
@@ -400,6 +499,21 @@ CSS = """
400
  box-shadow: var(--shadow);
401
  }
402
  .card *{ color: #0b0f19 !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  """
404
 
405
  def build_ui():
@@ -448,6 +562,9 @@ def build_ui():
448
  with gr.Row():
449
  refresh_btn = gr.Button("Refresh list", variant="secondary")
450
  run_b = gr.Button("Analyze selected bundled", variant="secondary")
 
 
 
451
  with gr.Column(scale=7):
452
  timeline_df = gr.Dataframe(
453
  headers=["#", "File", "Duration", "Pauses", "Pause(s)", "Pitch(Hz)", "RMS", "Active %"],
@@ -455,21 +572,18 @@ def build_ui():
455
  wrap=True,
456
  )
457
  timeline_plot = gr.Plot(label="Trend plot")
458
- timeline_expl = gr.Markdown("### Upload/select at least 2 recordings.", elem_classes=["card"])
459
-
460
- run_up.click(analyze_many_uploaded, inputs=[files], outputs=[timeline_df, timeline_plot, timeline_expl])
461
- run_b.click(analyze_many_bundled, inputs=[bundled_select], outputs=[timeline_df, timeline_plot, timeline_expl])
462
 
463
- # Refresh both bundled choices AND diagnostics output
464
- # (diagnostics component is defined in Diagnostics tab below; we reference it by variable)
465
- # We'll bind refresh after diag is created.
466
 
467
  with gr.TabItem("Diagnostics"):
468
  diag = gr.Markdown(diagnostics_text(), elem_classes=["card"])
469
  diag_refresh = gr.Button("Refresh diagnostics", variant="secondary")
470
  diag_refresh.click(lambda: diagnostics_text(), inputs=None, outputs=[diag])
471
 
472
- # Bind refresh button now that diag exists
473
  refresh_btn.click(refresh_bundled, inputs=None, outputs=[bundled_select, diag])
474
 
475
  return demo
 
69
  lines.append("- *(none found next to app.py)*")
70
 
71
  lines.append("")
72
+ lines.append("**Microphone note:** recording can be blocked by browser permissions / corporate policy.")
73
  lines.append("Try opening the Space in a new tab and allow microphone access.")
74
  return "\n".join(lines)
75
 
76
 
77
+ def _finite(x: float) -> bool:
78
+ return x is not None and isinstance(x, (int, float, np.floating)) and math.isfinite(float(x))
79
+
80
+
81
  # =========================================================
82
  # Features
83
  # =========================================================
 
257
  return fig
258
 
259
 
260
+ # =========================================================
261
+ # Tables + Explanations
262
+ # =========================================================
263
  def features_table(feats: Features) -> List[List[str]]:
264
  def f3(x):
265
  return "—" if (x is None or not math.isfinite(x)) else f"{float(x):.3f}"
 
285
  )
286
 
287
 
288
+ def interpret_delta(label: str, delta: float) -> str:
289
+ """
290
+ Very conservative, explainable interpretation. No clinical claims.
291
+ """
292
+ if not _finite(delta):
293
+ return f"- **{label}**: not available."
294
+ # Use direction-only interpretations
295
+ if "pause" in label.lower():
296
+ if delta > 0:
297
+ return f"- **{label}** increased. This can reflect slower speech, more hesitations, fatigue, distraction, or noise/environment changes."
298
+ if delta < 0:
299
+ return f"- **{label}** decreased. This can reflect more continuous speech or fewer hesitations."
300
+ return f"- **{label}** stayed similar."
301
+ if "pitch" in label.lower():
302
+ if delta > 0:
303
+ return f"- **{label}** increased. This can reflect different speaking style, emotion, or prosody changes."
304
+ if delta < 0:
305
+ return f"- **{label}** decreased. This can reflect a flatter/less variable prosody or a different speaking style."
306
+ return f"- **{label}** stayed similar."
307
+ if "rms" in label.lower() or "energy" in label.lower():
308
+ if delta > 0:
309
+ return f"- **{label}** increased. This can reflect speaking louder/closer to mic, or a quieter environment."
310
+ if delta < 0:
311
+ return f"- **{label}** decreased. This can reflect speaking softer/farther from mic, or a noisier environment."
312
+ return f"- **{label}** stayed similar."
313
+ if "active speech" in label.lower():
314
+ if delta > 0:
315
+ return f"- **{label}** increased. More time above the energy threshold (more continuous speech or less silence)."
316
+ if delta < 0:
317
+ return f"- **{label}** decreased. More time below threshold (more silence/pauses)."
318
+ return f"- **{label}** stayed similar."
319
+ return f"- **{label}** changed by {delta:+.3f}."
320
+
321
+
322
+ def summary_of_changes(first: Features, last: Features) -> str:
323
+ """
324
+ Compare first vs last recording in the timeline.
325
+ Generates an explainable summary + cautious interpretation.
326
+ """
327
+ # compute deltas (last - first)
328
+ d_pause_total = (last.pause_total_s - first.pause_total_s) if (_finite(last.pause_total_s) and _finite(first.pause_total_s)) else float("nan")
329
+ d_n_pauses = (last.n_pauses - first.n_pauses) if (last.n_pauses is not None and first.n_pauses is not None) else float("nan")
330
+ d_pitch = (last.pitch_median_hz - first.pitch_median_hz) if (_finite(last.pitch_median_hz) and _finite(first.pitch_median_hz)) else float("nan")
331
+ d_rms = (last.rms_mean - first.rms_mean) if (_finite(last.rms_mean) and _finite(first.rms_mean)) else float("nan")
332
+ d_active = (last.active_ratio - first.active_ratio) if (_finite(last.active_ratio) and _finite(first.active_ratio)) else float("nan")
333
+
334
+ # small helper formatting
335
+ def fmt(x, unit=""):
336
+ if not _finite(x):
337
+ return "—"
338
+ if unit == "%":
339
+ return f"{x*100:+.1f}%"
340
+ return f"{x:+.3f}{unit}"
341
+
342
+ lines = []
343
+ lines.append("### Summary of changes (last vs first)")
344
+ lines.append("This compares the **first** and **last** recording you provided (chronological order recommended).")
345
+ lines.append("")
346
+ lines.append("**Measured differences (Δ = last − first):**")
347
+ lines.append(f"- Total pause time: **{fmt(d_pause_total, 's')}**")
348
+ lines.append(f"- Number of pauses: **{d_n_pauses:+d}**" if isinstance(d_n_pauses, int) else f"- Number of pauses: **{fmt(d_n_pauses)}**")
349
+ lines.append(f"- Median pitch: **{fmt(d_pitch, ' Hz')}**")
350
+ lines.append(f"- RMS energy: **{fmt(d_rms)}**")
351
+ lines.append(f"- Active speech ratio: **{fmt(d_active, '%')}**")
352
+ lines.append("")
353
+ lines.append("**Possible (non-clinical) interpretations:**")
354
+ lines.append(interpret_delta("Total pause time", d_pause_total))
355
+ lines.append(interpret_delta("Number of pauses", float(d_n_pauses) if isinstance(d_n_pauses, int) else d_n_pauses))
356
+ lines.append(interpret_delta("Median pitch", d_pitch))
357
+ lines.append(interpret_delta("RMS energy", d_rms))
358
+ lines.append(interpret_delta("Active speech ratio", d_active))
359
+ lines.append("")
360
+ lines.append(
361
+ "**Important:** these are **speech-signal explanations**, not a diagnosis. "
362
+ "Real-world meaning depends on context (device, environment, fatigue, stress, medication, etc.)."
363
+ )
364
+ return "\n".join(lines)
365
+
366
+
367
  def explain_timeline() -> str:
368
  return (
369
  "### Timeline principle\n"
370
+ "- Use **multiple recordings of the same person**.\n"
371
  "- The key is **within-person change over time** relative to baseline.\n"
372
+ "- The Summary box explains **what changed** (signals) and gives cautious, non-clinical interpretations.\n"
373
  )
374
 
375
 
 
389
  return (
390
  [[1, "—", "Upload/select at least 2 recordings.", "", "", "", "", ""]],
391
  None,
392
+ explain_timeline(),
393
+ "### Upload/select at least 2 recordings to generate a summary."
394
  )
395
 
396
  rows = []
397
  pause_series, pitch_series, rms_series = [], [], []
398
 
399
+ # store first/last features for summary
400
+ feats_first = None
401
+ feats_last = None
402
+
403
  for idx, path in enumerate(paths, start=1):
404
  name = os.path.basename(path)
405
  y, sr = load_audio_file(path)
406
  feats, _ = compute_features(y, sr)
407
 
408
+ if idx == 1:
409
+ feats_first = feats
410
+ feats_last = feats
411
+
412
  pause_s = feats.pause_total_s if math.isfinite(feats.pause_total_s) else np.nan
413
  pitch_hz = feats.pitch_median_hz if math.isfinite(feats.pitch_median_hz) else np.nan
414
  rms_m = feats.rms_mean if math.isfinite(feats.rms_mean) else np.nan
 
441
  ax.legend(loc="best")
442
  fig.tight_layout()
443
 
444
+ summary = "### Summary not available."
445
+ if feats_first is not None and feats_last is not None:
446
+ summary = summary_of_changes(feats_first, feats_last)
447
+
448
+ return rows, fig, explain_timeline(), summary
449
 
450
 
451
  def analyze_many_uploaded(files):
 
466
 
467
  def refresh_bundled():
468
  bundled = list_bundled_audio()
 
469
  return gr.update(choices=bundled, value=[]), diagnostics_text()
470
 
471
 
 
499
  box-shadow: var(--shadow);
500
  }
501
  .card *{ color: #0b0f19 !important; }
502
+
503
+ /* Tabs: make readable on dark background */
504
+ div[role="tablist"]{
505
+ background: rgba(255,255,255,0.06) !important;
506
+ border: 1px solid rgba(255,255,255,0.14) !important;
507
+ border-radius: 14px !important;
508
+ padding: 6px !important;
509
+ }
510
+ button[role="tab"]{
511
+ color: rgba(255,255,255,0.92) !important;
512
+ }
513
+ button[role="tab"][aria-selected="true"]{
514
+ color: rgba(255,255,255,0.98) !important;
515
+ border-bottom: 2px solid rgba(255,255,255,0.65) !important;
516
+ }
517
  """
518
 
519
  def build_ui():
 
562
  with gr.Row():
563
  refresh_btn = gr.Button("Refresh list", variant="secondary")
564
  run_b = gr.Button("Analyze selected bundled", variant="secondary")
565
+
566
+ gr.Markdown("Order matters: first = baseline, last = comparison.", elem_classes=["card"])
567
+
568
  with gr.Column(scale=7):
569
  timeline_df = gr.Dataframe(
570
  headers=["#", "File", "Duration", "Pauses", "Pause(s)", "Pitch(Hz)", "RMS", "Active %"],
 
572
  wrap=True,
573
  )
574
  timeline_plot = gr.Plot(label="Trend plot")
575
+ timeline_expl = gr.Markdown(explain_timeline(), elem_classes=["card"])
576
+ timeline_summary = gr.Markdown("### Summary will appear here after analysis.", elem_classes=["card"])
 
 
577
 
578
+ run_up.click(analyze_many_uploaded, inputs=[files], outputs=[timeline_df, timeline_plot, timeline_expl, timeline_summary])
579
+ run_b.click(analyze_many_bundled, inputs=[bundled_select], outputs=[timeline_df, timeline_plot, timeline_expl, timeline_summary])
 
580
 
581
  with gr.TabItem("Diagnostics"):
582
  diag = gr.Markdown(diagnostics_text(), elem_classes=["card"])
583
  diag_refresh = gr.Button("Refresh diagnostics", variant="secondary")
584
  diag_refresh.click(lambda: diagnostics_text(), inputs=None, outputs=[diag])
585
 
586
+ # Refresh bundled choices AND diagnostics
587
  refresh_btn.click(refresh_bundled, inputs=None, outputs=[bundled_select, diag])
588
 
589
  return demo