Spaces:

Marcel0123
/

Explainable-Speech-Analytics

Sleeping

App Files Files Community

Marcel0123 commited on 20 days ago

Commit

eadd348

verified ·

1 Parent(s): 3349dc8

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -13

app.py CHANGED Viewed

@@ -69,11 +69,15 @@ def diagnostics_text() -> str:
         lines.append("- *(none found next to app.py)*")
     lines.append("")
-    lines.append("**Microphone note:** If recording doesn’t work, it’s usually browser permissions / corporate policy.")
     lines.append("Try opening the Space in a new tab and allow microphone access.")
     return "\n".join(lines)
 # =========================================================
 # Features
 # =========================================================
@@ -253,6 +257,9 @@ def plot_pitch(art: Dict[str, Any]) -> plt.Figure:
     return fig
 def features_table(feats: Features) -> List[List[str]]:
     def f3(x):
         return "—" if (x is None or not math.isfinite(x)) else f"{float(x):.3f}"
@@ -278,11 +285,91 @@ def explain_single(feats: Features) -> str:
     )
 def explain_timeline() -> str:
     return (
         "### Timeline principle\n"
-        "- Upload/select **multiple recordings of the same person**.\n"
         "- The key is **within-person change over time** relative to baseline.\n"
     )
@@ -302,17 +389,26 @@ def analyze_many_paths(paths: List[str]):
         return (
             [[1, "—", "Upload/select at least 2 recordings.", "", "", "", "", ""]],
             None,
-            "### Upload/select at least 2 recordings."
         )
     rows = []
     pause_series, pitch_series, rms_series = [], [], []
     for idx, path in enumerate(paths, start=1):
         name = os.path.basename(path)
         y, sr = load_audio_file(path)
         feats, _ = compute_features(y, sr)
         pause_s = feats.pause_total_s if math.isfinite(feats.pause_total_s) else np.nan
         pitch_hz = feats.pitch_median_hz if math.isfinite(feats.pitch_median_hz) else np.nan
         rms_m = feats.rms_mean if math.isfinite(feats.rms_mean) else np.nan
@@ -345,7 +441,11 @@ def analyze_many_paths(paths: List[str]):
     ax.legend(loc="best")
     fig.tight_layout()
-    return rows, fig, explain_timeline()
 def analyze_many_uploaded(files):
@@ -366,7 +466,6 @@ def analyze_many_bundled(selected_filenames: List[str]):
 def refresh_bundled():
     bundled = list_bundled_audio()
-    # Return updated choices and refreshed diagnostics text
     return gr.update(choices=bundled, value=[]), diagnostics_text()
@@ -400,6 +499,21 @@ CSS = """
   box-shadow: var(--shadow);
 }
 .card *{ color: #0b0f19 !important; }
 """
 def build_ui():
@@ -448,6 +562,9 @@ def build_ui():
                         with gr.Row():
                             refresh_btn = gr.Button("Refresh list", variant="secondary")
                             run_b = gr.Button("Analyze selected bundled", variant="secondary")
                     with gr.Column(scale=7):
                         timeline_df = gr.Dataframe(
                             headers=["#", "File", "Duration", "Pauses", "Pause(s)", "Pitch(Hz)", "RMS", "Active %"],
@@ -455,21 +572,18 @@ def build_ui():
                             wrap=True,
                         )
                         timeline_plot = gr.Plot(label="Trend plot")
-                        timeline_expl = gr.Markdown("### Upload/select at least 2 recordings.", elem_classes=["card"])
-                run_up.click(analyze_many_uploaded, inputs=[files], outputs=[timeline_df, timeline_plot, timeline_expl])
-                run_b.click(analyze_many_bundled, inputs=[bundled_select], outputs=[timeline_df, timeline_plot, timeline_expl])
-                # Refresh both bundled choices AND diagnostics output
-                # (diagnostics component is defined in Diagnostics tab below; we reference it by variable)
-                # We'll bind refresh after diag is created.
             with gr.TabItem("Diagnostics"):
                 diag = gr.Markdown(diagnostics_text(), elem_classes=["card"])
                 diag_refresh = gr.Button("Refresh diagnostics", variant="secondary")
                 diag_refresh.click(lambda: diagnostics_text(), inputs=None, outputs=[diag])
-        # Bind refresh button now that diag exists
         refresh_btn.click(refresh_bundled, inputs=None, outputs=[bundled_select, diag])
     return demo

         lines.append("- *(none found next to app.py)*")
     lines.append("")
+    lines.append("**Microphone note:** recording can be blocked by browser permissions / corporate policy.")
     lines.append("Try opening the Space in a new tab and allow microphone access.")
     return "\n".join(lines)
+def _finite(x: float) -> bool:
+    return x is not None and isinstance(x, (int, float, np.floating)) and math.isfinite(float(x))
 # =========================================================
 # Features
 # =========================================================
     return fig
+# =========================================================
+# Tables + Explanations
+# =========================================================
 def features_table(feats: Features) -> List[List[str]]:
     def f3(x):
         return "—" if (x is None or not math.isfinite(x)) else f"{float(x):.3f}"
     )
+def interpret_delta(label: str, delta: float) -> str:
+    """
+    Very conservative, explainable interpretation. No clinical claims.
+    """
+    if not _finite(delta):
+        return f"- **{label}**: not available."
+    # Use direction-only interpretations
+    if "pause" in label.lower():
+        if delta > 0:
+            return f"- **{label}** increased. This can reflect slower speech, more hesitations, fatigue, distraction, or noise/environment changes."
+        if delta < 0:
+            return f"- **{label}** decreased. This can reflect more continuous speech or fewer hesitations."
+        return f"- **{label}** stayed similar."
+    if "pitch" in label.lower():
+        if delta > 0:
+            return f"- **{label}** increased. This can reflect different speaking style, emotion, or prosody changes."
+        if delta < 0:
+            return f"- **{label}** decreased. This can reflect a flatter/less variable prosody or a different speaking style."
+        return f"- **{label}** stayed similar."
+    if "rms" in label.lower() or "energy" in label.lower():
+        if delta > 0:
+            return f"- **{label}** increased. This can reflect speaking louder/closer to mic, or a quieter environment."
+        if delta < 0:
+            return f"- **{label}** decreased. This can reflect speaking softer/farther from mic, or a noisier environment."
+        return f"- **{label}** stayed similar."
+    if "active speech" in label.lower():
+        if delta > 0:
+            return f"- **{label}** increased. More time above the energy threshold (more continuous speech or less silence)."
+        if delta < 0:
+            return f"- **{label}** decreased. More time below threshold (more silence/pauses)."
+        return f"- **{label}** stayed similar."
+    return f"- **{label}** changed by {delta:+.3f}."
+def summary_of_changes(first: Features, last: Features) -> str:
+    """
+    Compare first vs last recording in the timeline.
+    Generates an explainable summary + cautious interpretation.
+    """
+    # compute deltas (last - first)
+    d_pause_total = (last.pause_total_s - first.pause_total_s) if (_finite(last.pause_total_s) and _finite(first.pause_total_s)) else float("nan")
+    d_n_pauses = (last.n_pauses - first.n_pauses) if (last.n_pauses is not None and first.n_pauses is not None) else float("nan")
+    d_pitch = (last.pitch_median_hz - first.pitch_median_hz) if (_finite(last.pitch_median_hz) and _finite(first.pitch_median_hz)) else float("nan")
+    d_rms = (last.rms_mean - first.rms_mean) if (_finite(last.rms_mean) and _finite(first.rms_mean)) else float("nan")
+    d_active = (last.active_ratio - first.active_ratio) if (_finite(last.active_ratio) and _finite(first.active_ratio)) else float("nan")
+    # small helper formatting
+    def fmt(x, unit=""):
+        if not _finite(x):
+            return "—"
+        if unit == "%":
+            return f"{x*100:+.1f}%"
+        return f"{x:+.3f}{unit}"
+    lines = []
+    lines.append("### Summary of changes (last vs first)")
+    lines.append("This compares the **first** and **last** recording you provided (chronological order recommended).")
+    lines.append("")
+    lines.append("**Measured differences (Δ = last − first):**")
+    lines.append(f"- Total pause time: **{fmt(d_pause_total, 's')}**")
+    lines.append(f"- Number of pauses: **{d_n_pauses:+d}**" if isinstance(d_n_pauses, int) else f"- Number of pauses: **{fmt(d_n_pauses)}**")
+    lines.append(f"- Median pitch: **{fmt(d_pitch, ' Hz')}**")
+    lines.append(f"- RMS energy: **{fmt(d_rms)}**")
+    lines.append(f"- Active speech ratio: **{fmt(d_active, '%')}**")
+    lines.append("")
+    lines.append("**Possible (non-clinical) interpretations:**")
+    lines.append(interpret_delta("Total pause time", d_pause_total))
+    lines.append(interpret_delta("Number of pauses", float(d_n_pauses) if isinstance(d_n_pauses, int) else d_n_pauses))
+    lines.append(interpret_delta("Median pitch", d_pitch))
+    lines.append(interpret_delta("RMS energy", d_rms))
+    lines.append(interpret_delta("Active speech ratio", d_active))
+    lines.append("")
+    lines.append(
+        "**Important:** these are **speech-signal explanations**, not a diagnosis. "
+        "Real-world meaning depends on context (device, environment, fatigue, stress, medication, etc.)."
+    )
+    return "\n".join(lines)
 def explain_timeline() -> str:
     return (
         "### Timeline principle\n"
+        "- Use **multiple recordings of the same person**.\n"
         "- The key is **within-person change over time** relative to baseline.\n"
+        "- The Summary box explains **what changed** (signals) and gives cautious, non-clinical interpretations.\n"
     )
         return (
             [[1, "—", "Upload/select at least 2 recordings.", "", "", "", "", ""]],
             None,
+            explain_timeline(),
+            "### Upload/select at least 2 recordings to generate a summary."
         )
     rows = []
     pause_series, pitch_series, rms_series = [], [], []
+    # store first/last features for summary
+    feats_first = None
+    feats_last = None
     for idx, path in enumerate(paths, start=1):
         name = os.path.basename(path)
         y, sr = load_audio_file(path)
         feats, _ = compute_features(y, sr)
+        if idx == 1:
+            feats_first = feats
+        feats_last = feats
         pause_s = feats.pause_total_s if math.isfinite(feats.pause_total_s) else np.nan
         pitch_hz = feats.pitch_median_hz if math.isfinite(feats.pitch_median_hz) else np.nan
         rms_m = feats.rms_mean if math.isfinite(feats.rms_mean) else np.nan
     ax.legend(loc="best")
     fig.tight_layout()
+    summary = "### Summary not available."
+    if feats_first is not None and feats_last is not None:
+        summary = summary_of_changes(feats_first, feats_last)
+    return rows, fig, explain_timeline(), summary
 def analyze_many_uploaded(files):
 def refresh_bundled():
     bundled = list_bundled_audio()
     return gr.update(choices=bundled, value=[]), diagnostics_text()
   box-shadow: var(--shadow);
 }
 .card *{ color: #0b0f19 !important; }
+/* Tabs: make readable on dark background */
+div[role="tablist"]{
+  background: rgba(255,255,255,0.06) !important;
+  border: 1px solid rgba(255,255,255,0.14) !important;
+  border-radius: 14px !important;
+  padding: 6px !important;
+}
+button[role="tab"]{
+  color: rgba(255,255,255,0.92) !important;
+}
+button[role="tab"][aria-selected="true"]{
+  color: rgba(255,255,255,0.98) !important;
+  border-bottom: 2px solid rgba(255,255,255,0.65) !important;
+}
 """
 def build_ui():
                         with gr.Row():
                             refresh_btn = gr.Button("Refresh list", variant="secondary")
                             run_b = gr.Button("Analyze selected bundled", variant="secondary")
+                        gr.Markdown("Order matters: first = baseline, last = comparison.", elem_classes=["card"])
                     with gr.Column(scale=7):
                         timeline_df = gr.Dataframe(
                             headers=["#", "File", "Duration", "Pauses", "Pause(s)", "Pitch(Hz)", "RMS", "Active %"],
                             wrap=True,
                         )
                         timeline_plot = gr.Plot(label="Trend plot")
+                        timeline_expl = gr.Markdown(explain_timeline(), elem_classes=["card"])
+                        timeline_summary = gr.Markdown("### Summary will appear here after analysis.", elem_classes=["card"])
+                run_up.click(analyze_many_uploaded, inputs=[files], outputs=[timeline_df, timeline_plot, timeline_expl, timeline_summary])
+                run_b.click(analyze_many_bundled, inputs=[bundled_select], outputs=[timeline_df, timeline_plot, timeline_expl, timeline_summary])
             with gr.TabItem("Diagnostics"):
                 diag = gr.Markdown(diagnostics_text(), elem_classes=["card"])
                 diag_refresh = gr.Button("Refresh diagnostics", variant="secondary")
                 diag_refresh.click(lambda: diagnostics_text(), inputs=None, outputs=[diag])
+        # Refresh bundled choices AND diagnostics
         refresh_btn.click(refresh_bundled, inputs=None, outputs=[bundled_select, diag])
     return demo