Marcel0123 commited on
Commit
5c4e27d
·
verified ·
1 Parent(s): 482cc2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -95
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import math
3
  import numpy as np
@@ -6,7 +7,8 @@ import librosa
6
  import matplotlib.pyplot as plt
7
 
8
  from dataclasses import dataclass
9
- from typing import Dict, Any, Tuple, Optional, List
 
10
 
11
  import torch
12
  from transformers import Wav2Vec2Model, Wav2Vec2FeatureExtractor
@@ -21,51 +23,43 @@ MODEL_ID = os.getenv("W2V_MODEL_ID", "facebook/wav2vec2-base-960h")
21
  # -----------------------------
22
  # Lightweight explainability helpers
23
  # -----------------------------
24
- def _safe_float(x, default=np.nan):
25
- try:
26
- if x is None:
27
- return default
28
- x = float(x)
29
- if math.isfinite(x):
30
- return x
31
- return default
32
- except Exception:
33
- return default
34
-
35
  def _human_seconds(sec: float) -> str:
36
  if not math.isfinite(sec):
37
  return "—"
38
  if sec < 60:
39
  return f"{sec:.1f}s"
40
  m = int(sec // 60)
41
- s = sec - 60*m
42
  return f"{m}m {s:.1f}s"
43
 
 
44
  def _cosine(a: np.ndarray, b: np.ndarray) -> float:
45
  a = np.asarray(a, dtype=np.float32)
46
  b = np.asarray(b, dtype=np.float32)
47
  denom = (np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12
48
  return float(np.dot(a, b) / denom)
49
 
 
50
  # -----------------------------
51
  # Model (audio embedding)
52
  # -----------------------------
53
- @gr.cache()
54
  def load_w2v():
55
  extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
56
  model = Wav2Vec2Model.from_pretrained(MODEL_ID).to(DEVICE)
57
  model.eval()
58
  return extractor, model
59
 
 
60
  def embed_audio(y: np.ndarray, sr: int) -> np.ndarray:
61
  extractor, model = load_w2v()
62
  if sr != TARGET_SR:
63
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SR)
64
  sr = TARGET_SR
65
 
66
- # Normalize to [-1, 1]
67
  if y.size == 0:
68
  return np.zeros((768,), dtype=np.float32)
 
69
  y = y.astype(np.float32)
70
  mx = float(np.max(np.abs(y))) + 1e-9
71
  y = y / mx
@@ -74,10 +68,10 @@ def embed_audio(y: np.ndarray, sr: int) -> np.ndarray:
74
  with torch.no_grad():
75
  input_values = inputs["input_values"].to(DEVICE)
76
  out = model(input_values)
77
- # Mean pooling over time
78
  emb = out.last_hidden_state.mean(dim=1).squeeze(0).detach().cpu().numpy()
79
  return emb.astype(np.float32)
80
 
 
81
  # -----------------------------
82
  # Feature extraction
83
  # -----------------------------
@@ -94,6 +88,7 @@ class Features:
94
  pause_total_s: float
95
  active_ratio: float
96
 
 
97
  def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
98
  """Return features + artifacts for plots/inspection."""
99
  if y is None or len(y) == 0:
@@ -105,22 +100,19 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
105
  sr = TARGET_SR
106
 
107
  y = y.astype(np.float32)
108
- # Trim leading/trailing silence slightly for stability, but keep for pause detection
109
  duration = float(len(y) / sr)
110
 
111
- # Frame-level features
112
- hop = 160 # 10 ms at 16k
113
- frame = 400 # 25 ms at 16k
114
 
115
  rms = librosa.feature.rms(y=y, frame_length=frame, hop_length=hop)[0]
116
  zcr = librosa.feature.zero_crossing_rate(y, frame_length=frame, hop_length=hop)[0]
117
 
118
  rms_mean = float(np.mean(rms)) if rms.size else np.nan
119
- rms_std = float(np.std(rms)) if rms.size else np.nan
120
  zcr_mean = float(np.mean(zcr)) if zcr.size else np.nan
121
 
122
- # Pitch using probabilistic YIN (pyin). Can be slow, but OK for short clips.
123
- # f0 contains NaN for unvoiced frames.
124
  try:
125
  f0, voiced_flag, voiced_probs = librosa.pyin(
126
  y,
@@ -132,7 +124,6 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
132
  )
133
  except Exception:
134
  f0 = None
135
- voiced_flag = None
136
 
137
  if f0 is None:
138
  pitch_median = np.nan
@@ -155,13 +146,11 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
155
  pitch_iqr = np.nan
156
 
157
  # Pause detection using RMS threshold (relative)
158
- # Convert rms frames -> boolean "silent"
159
  if rms.size:
160
- thr = float(np.percentile(rms, 20)) * 0.8 # conservative
161
  silent = rms < thr
162
- # Count pauses longer than 0.2s
163
- min_pause_frames = int(0.2 / (hop / sr))
164
- # Run-length encoding
165
  pauses = []
166
  start = None
167
  for i, s in enumerate(silent):
@@ -185,6 +174,7 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
185
  n_pauses = 0
186
  pause_total_s = 0.0
187
  active_ratio = np.nan
 
188
 
189
  feats = Features(
190
  duration_s=duration,
@@ -209,10 +199,11 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
209
  "pitch": pitch,
210
  "times": times,
211
  "pauses": pauses,
212
- "rms_thr": thr if rms.size else None,
213
  }
214
  return feats, artifacts
215
 
 
216
  # -----------------------------
217
  # Plotting
218
  # -----------------------------
@@ -224,6 +215,7 @@ def plot_waveform_with_pauses(artifacts: Dict[str, Any]) -> plt.Figure:
224
 
225
  fig = plt.figure(figsize=(10, 3.2))
226
  ax = fig.add_subplot(111)
 
227
  if y.size:
228
  t = np.arange(len(y)) / sr
229
  ax.plot(t, y, linewidth=0.8)
@@ -232,7 +224,6 @@ def plot_waveform_with_pauses(artifacts: Dict[str, Any]) -> plt.Figure:
232
  ax.set_ylabel("Amplitude")
233
  ax.set_title("Waveform (met gedetecteerde pauzes)")
234
 
235
- # Overlay pause regions (convert pause frames to time)
236
  for (s, e) in pauses:
237
  ts = s * (hop / sr)
238
  te = e * (hop / sr)
@@ -244,12 +235,14 @@ def plot_waveform_with_pauses(artifacts: Dict[str, Any]) -> plt.Figure:
244
  fig.tight_layout()
245
  return fig
246
 
 
247
  def plot_pitch(artifacts: Dict[str, Any]) -> plt.Figure:
248
  pitch = artifacts.get("pitch", np.array([]))
249
  times = artifacts.get("times", np.array([]))
250
 
251
  fig = plt.figure(figsize=(10, 3.2))
252
  ax = fig.add_subplot(111)
 
253
  if pitch.size and times.size:
254
  ax.plot(times, pitch, linewidth=1.0)
255
  ax.set_xlabel("Tijd (s)")
@@ -262,43 +255,46 @@ def plot_pitch(artifacts: Dict[str, Any]) -> plt.Figure:
262
  fig.tight_layout()
263
  return fig
264
 
 
265
  # -----------------------------
266
  # UI helpers
267
  # -----------------------------
268
  def format_features_table(feats: Features) -> List[List[str]]:
269
- def fmt(x, kind="float"):
270
- if x is None or (isinstance(x, float) and (not math.isfinite(x))):
271
  return "—"
272
- if kind == "sec":
273
- return _human_seconds(float(x))
274
- if kind == "int":
275
- return str(int(x))
276
  return f"{float(x):.3f}"
277
 
 
 
 
 
 
278
  return [
279
- ["Duur", fmt(feats.duration_s, "sec")],
280
- ["Volume (RMS) gemiddeld", fmt(feats.rms_mean)],
281
- ["Volume (RMS) variatie", fmt(feats.rms_std)],
282
- ["ZCR (ruis/‘scherpte’) gemiddeld", fmt(feats.zcr_mean)],
283
- ["Pitch mediaan", ("—" if not math.isfinite(feats.pitch_median_hz) else f"{feats.pitch_median_hz:.1f} Hz")],
284
- ["Pitch spreiding (IQR)", ("—" if not math.isfinite(feats.pitch_iqr_hz) else f"{feats.pitch_iqr_hz:.1f} Hz")],
285
- ["Voiced ratio", ("—" if not math.isfinite(feats.voiced_ratio) else f"{feats.voiced_ratio*100:.1f}%")],
286
- ["Aantal pauzes (≥ 0.2s)", fmt(feats.n_pauses, "int")],
287
- ["Totale pauzeduur", fmt(feats.pause_total_s, "sec")],
288
- ["Actieve-spraak ratio", ("—" if not math.isfinite(feats.active_ratio) else f"{feats.active_ratio*100:.1f}%")],
289
  ]
290
 
 
291
  def explain_panel(feats: Features) -> str:
292
- # Human-friendly explanation without medical conclusions.
293
  bullets = []
294
  if math.isfinite(feats.pause_total_s):
295
- bullets.append(f"- **Pauzes**: {feats.n_pauses} pauzes (≥0.2s), samen { _human_seconds(feats.pause_total_s) }.")
296
  if math.isfinite(feats.pitch_median_hz):
297
  bullets.append(f"- **Pitch**: mediaan ~ {feats.pitch_median_hz:.1f} Hz, spreiding (IQR) {feats.pitch_iqr_hz:.1f} Hz.")
298
  if math.isfinite(feats.rms_mean):
299
  bullets.append(f"- **Volume**: RMS gemiddeld {feats.rms_mean:.3f} (relatief; alleen vergelijken binnen dezelfde setup).")
300
  if math.isfinite(feats.active_ratio):
301
  bullets.append(f"- **Actieve spraak**: ~ {feats.active_ratio*100:.1f}% van de tijd boven drempel.")
 
302
  if not bullets:
303
  bullets = ["- Geen features beschikbaar (audio te kort of leeg)."]
304
 
@@ -311,6 +307,7 @@ def explain_panel(feats: Features) -> str:
311
  "Gebruik dit als gespreksstarter of educatieve visualisatie."
312
  )
313
 
 
314
  # -----------------------------
315
  # Core callbacks
316
  # -----------------------------
@@ -325,6 +322,7 @@ def analyze_single(audio: Tuple[int, np.ndarray]):
325
  expl = explain_panel(feats)
326
  return gr.Dataframe(value=table, headers=["Kenmerk", "Waarde"]), wf, pc, expl
327
 
 
328
  def analyze_compare(a1, a2):
329
  if a1 is None or a2 is None:
330
  return "—", gr.Dataframe(value=[["—", "Selecteer twee fragmenten."]]), None
@@ -339,8 +337,7 @@ def analyze_compare(a1, a2):
339
  e2 = embed_audio(art2["y"], art2["sr"])
340
  sim = _cosine(e1, e2)
341
 
342
- # Delta table
343
- def d(a, b):
344
  if (a is None) or (b is None):
345
  return "—"
346
  if (isinstance(a, float) and not math.isfinite(a)) or (isinstance(b, float) and not math.isfinite(b)):
@@ -348,35 +345,29 @@ def analyze_compare(a1, a2):
348
  return f"{(b - a):+.3f}"
349
 
350
  rows = [
351
- ["Duur (s)", f1.duration_s if math.isfinite(f1.duration_s) else np.nan, f2.duration_s if math.isfinite(f2.duration_s) else np.nan, d(f1.duration_s, f2.duration_s)],
352
- ["RMS mean", f1.rms_mean, f2.rms_mean, d(f1.rms_mean, f2.rms_mean)],
353
- ["Pitch mediaan (Hz)", f1.pitch_median_hz, f2.pitch_median_hz, d(f1.pitch_median_hz, f2.pitch_median_hz)],
354
  ["Pauzes (#)", float(f1.n_pauses), float(f2.n_pauses), f"{(f2.n_pauses - f1.n_pauses):+d}"],
355
- ["Pauzeduur (s)", f1.pause_total_s, f2.pause_total_s, d(f1.pause_total_s, f2.pause_total_s)],
356
- ["Actieve ratio", f1.active_ratio, f2.active_ratio, d(f1.active_ratio, f2.active_ratio)],
357
  ]
358
 
359
- # Format values nicely
360
  formatted = []
361
  for k, v1, v2, dv in rows:
362
- def fmtv(v):
363
  if isinstance(v, float) and math.isfinite(v):
364
  if "ratio" in k.lower():
365
  return f"{v*100:.1f}%"
366
  if "pitch" in k.lower():
367
  return f"{v:.1f}"
368
- if "duur" in k.lower() or "s)" in k.lower() or "(s)" in k.lower() or "RMS" in k:
369
- return f"{v:.3f}"
370
  return f"{v:.3f}"
371
- if isinstance(v, (int, np.integer)):
372
- return str(int(v))
373
  return "—"
374
- formatted.append([k, fmtv(v1), fmtv(v2), dv])
375
 
376
- # Compare waveform overlay
377
  fig = plt.figure(figsize=(10, 3.2))
378
  ax = fig.add_subplot(111)
379
- # downsample for plotting speed
380
  def prep_plot(y, sr):
381
  if sr != TARGET_SR:
382
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SR)
@@ -388,18 +379,20 @@ def analyze_compare(a1, a2):
388
 
389
  t1, yy1 = prep_plot(y1, sr1)
390
  t2, yy2 = prep_plot(y2, sr2)
 
391
  if yy1.size:
392
  ax.plot(t1, yy1, linewidth=0.8, label="Fragment A")
393
  if yy2.size:
394
  ax.plot(t2, yy2, linewidth=0.8, label="Fragment B", alpha=0.8)
 
395
  ax.set_title("Waveform overlay (eerste max 20s)")
396
  ax.set_xlabel("Tijd (s)")
397
  ax.set_ylabel("Amplitude")
398
  ax.legend(loc="upper right")
399
  fig.tight_layout()
400
 
401
- sim_txt = f"{sim*100:.1f}%"
402
- return sim_txt, gr.Dataframe(value=formatted, headers=["Kenmerk", "A", "B", "Δ (B−A)"]), fig
403
 
404
  # -----------------------------
405
  # UI
@@ -408,11 +401,8 @@ CSS = """
408
  :root{
409
  --bg: #0b0f19;
410
  --panel: rgba(255,255,255,0.06);
411
- --panel2: rgba(255,255,255,0.09);
412
  --text: rgba(255,255,255,0.92);
413
  --muted: rgba(255,255,255,0.70);
414
- --accent: #7c3aed;
415
- --accent2: #22c55e;
416
  --border: rgba(255,255,255,0.14);
417
  --shadow: 0 10px 30px rgba(0,0,0,0.35);
418
  }
@@ -447,14 +437,6 @@ CSS = """
447
  line-height: 1.45;
448
  }
449
 
450
- .card{
451
- background: var(--panel);
452
- border: 1px solid var(--border);
453
- border-radius: 18px;
454
- padding: 14px;
455
- box-shadow: var(--shadow);
456
- }
457
-
458
  .badge{
459
  display: inline-flex;
460
  align-items: center;
@@ -523,19 +505,16 @@ def build_demo():
523
  """
524
  )
525
  with gr.Column(scale=7):
526
- with gr.Row():
527
- feat_df = gr.Dataframe(
528
- headers=["Kenmerk", "Waarde"],
529
- datatype=["str", "str"],
530
- interactive=False,
531
- wrap=True,
532
- label="Meetbare kenmerken"
533
- )
534
- with gr.Row():
535
- wf_plot = gr.Plot(label="Waveform + pauzes")
536
- with gr.Row():
537
- pitch_plot = gr.Plot(label="Pitch")
538
- explanation = gr.Markdown("### Upload of neem audio op", elem_classes=["card"])
539
 
540
  run_btn.click(analyze_single, inputs=[input_audio], outputs=[feat_df, wf_plot, pitch_plot, explanation])
541
 
@@ -558,7 +537,7 @@ def build_demo():
558
  datatype=["str", "str", "str", "str"],
559
  interactive=False,
560
  wrap=True,
561
- label="Verschillen (uitlegbaar)"
562
  )
563
  overlay_plot = gr.Plot(label="Waveform overlay")
564
 
@@ -572,15 +551,14 @@ def build_demo():
572
  - We tonen **verschillen** tussen fragmenten, i.p.v. één eindlabel.
573
  - We geven **geen diagnose** of medische claim; de output is bedoeld als **observatie**.
574
  - In een zorgcontext hoort interpretatie altijd samen te gaan met **context + gesprek + klinisch oordeel**.
575
-
576
- **Let op:** als je dit ooit richting praktijk wilt brengen, heb je o.a. nodig:
577
- governance, dataminimalisatie, DPIA/AVG, bias-audit, modelmonitoring, en duidelijke ‘human-in-the-loop’ afspraken.
578
  """
579
  )
580
 
581
  return demo
582
 
 
583
  if __name__ == "__main__":
584
  demo = build_demo()
585
  demo.queue(max_size=32)
586
  demo.launch()
 
 
1
+ ```python
2
  import os
3
  import math
4
  import numpy as np
 
7
  import matplotlib.pyplot as plt
8
 
9
  from dataclasses import dataclass
10
+ from typing import Dict, Any, Tuple, List
11
+ from functools import lru_cache
12
 
13
  import torch
14
  from transformers import Wav2Vec2Model, Wav2Vec2FeatureExtractor
 
23
  # -----------------------------
24
  # Lightweight explainability helpers
25
  # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
26
  def _human_seconds(sec: float) -> str:
27
  if not math.isfinite(sec):
28
  return "—"
29
  if sec < 60:
30
  return f"{sec:.1f}s"
31
  m = int(sec // 60)
32
+ s = sec - 60 * m
33
  return f"{m}m {s:.1f}s"
34
 
35
+
36
  def _cosine(a: np.ndarray, b: np.ndarray) -> float:
37
  a = np.asarray(a, dtype=np.float32)
38
  b = np.asarray(b, dtype=np.float32)
39
  denom = (np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12
40
  return float(np.dot(a, b) / denom)
41
 
42
+
43
  # -----------------------------
44
  # Model (audio embedding)
45
  # -----------------------------
46
+ @lru_cache(maxsize=1)
47
  def load_w2v():
48
  extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
49
  model = Wav2Vec2Model.from_pretrained(MODEL_ID).to(DEVICE)
50
  model.eval()
51
  return extractor, model
52
 
53
+
54
  def embed_audio(y: np.ndarray, sr: int) -> np.ndarray:
55
  extractor, model = load_w2v()
56
  if sr != TARGET_SR:
57
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SR)
58
  sr = TARGET_SR
59
 
 
60
  if y.size == 0:
61
  return np.zeros((768,), dtype=np.float32)
62
+
63
  y = y.astype(np.float32)
64
  mx = float(np.max(np.abs(y))) + 1e-9
65
  y = y / mx
 
68
  with torch.no_grad():
69
  input_values = inputs["input_values"].to(DEVICE)
70
  out = model(input_values)
 
71
  emb = out.last_hidden_state.mean(dim=1).squeeze(0).detach().cpu().numpy()
72
  return emb.astype(np.float32)
73
 
74
+
75
  # -----------------------------
76
  # Feature extraction
77
  # -----------------------------
 
88
  pause_total_s: float
89
  active_ratio: float
90
 
91
+
92
  def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
93
  """Return features + artifacts for plots/inspection."""
94
  if y is None or len(y) == 0:
 
100
  sr = TARGET_SR
101
 
102
  y = y.astype(np.float32)
 
103
  duration = float(len(y) / sr)
104
 
105
+ hop = 160 # 10 ms at 16k
106
+ frame = 400 # 25 ms at 16k
 
107
 
108
  rms = librosa.feature.rms(y=y, frame_length=frame, hop_length=hop)[0]
109
  zcr = librosa.feature.zero_crossing_rate(y, frame_length=frame, hop_length=hop)[0]
110
 
111
  rms_mean = float(np.mean(rms)) if rms.size else np.nan
112
+ rms_std = float(np.std(rms)) if rms.size else np.nan
113
  zcr_mean = float(np.mean(zcr)) if zcr.size else np.nan
114
 
115
+ # Pitch using probabilistic YIN (pyin)
 
116
  try:
117
  f0, voiced_flag, voiced_probs = librosa.pyin(
118
  y,
 
124
  )
125
  except Exception:
126
  f0 = None
 
127
 
128
  if f0 is None:
129
  pitch_median = np.nan
 
146
  pitch_iqr = np.nan
147
 
148
  # Pause detection using RMS threshold (relative)
 
149
  if rms.size:
150
+ thr = float(np.percentile(rms, 20)) * 0.8
151
  silent = rms < thr
152
+
153
+ min_pause_frames = int(0.2 / (hop / sr)) # pauses >= 0.2s
 
154
  pauses = []
155
  start = None
156
  for i, s in enumerate(silent):
 
174
  n_pauses = 0
175
  pause_total_s = 0.0
176
  active_ratio = np.nan
177
+ thr = None
178
 
179
  feats = Features(
180
  duration_s=duration,
 
199
  "pitch": pitch,
200
  "times": times,
201
  "pauses": pauses,
202
+ "rms_thr": thr,
203
  }
204
  return feats, artifacts
205
 
206
+
207
  # -----------------------------
208
  # Plotting
209
  # -----------------------------
 
215
 
216
  fig = plt.figure(figsize=(10, 3.2))
217
  ax = fig.add_subplot(111)
218
+
219
  if y.size:
220
  t = np.arange(len(y)) / sr
221
  ax.plot(t, y, linewidth=0.8)
 
224
  ax.set_ylabel("Amplitude")
225
  ax.set_title("Waveform (met gedetecteerde pauzes)")
226
 
 
227
  for (s, e) in pauses:
228
  ts = s * (hop / sr)
229
  te = e * (hop / sr)
 
235
  fig.tight_layout()
236
  return fig
237
 
238
+
239
  def plot_pitch(artifacts: Dict[str, Any]) -> plt.Figure:
240
  pitch = artifacts.get("pitch", np.array([]))
241
  times = artifacts.get("times", np.array([]))
242
 
243
  fig = plt.figure(figsize=(10, 3.2))
244
  ax = fig.add_subplot(111)
245
+
246
  if pitch.size and times.size:
247
  ax.plot(times, pitch, linewidth=1.0)
248
  ax.set_xlabel("Tijd (s)")
 
255
  fig.tight_layout()
256
  return fig
257
 
258
+
259
  # -----------------------------
260
  # UI helpers
261
  # -----------------------------
262
  def format_features_table(feats: Features) -> List[List[str]]:
263
+ def fmt_float(x):
264
+ if x is None or (isinstance(x, float) and not math.isfinite(x)):
265
  return "—"
 
 
 
 
266
  return f"{float(x):.3f}"
267
 
268
+ def fmt_int(x):
269
+ if x is None:
270
+ return "—"
271
+ return str(int(x))
272
+
273
  return [
274
+ ["Duur", _human_seconds(feats.duration_s)],
275
+ ["Volume (RMS) gemiddeld", fmt_float(feats.rms_mean)],
276
+ ["Volume (RMS) variatie", fmt_float(feats.rms_std)],
277
+ ["ZCR (ruis/‘scherpte’) gemiddeld", fmt_float(feats.zcr_mean)],
278
+ ["Pitch mediaan", "—" if not math.isfinite(feats.pitch_median_hz) else f"{feats.pitch_median_hz:.1f} Hz"],
279
+ ["Pitch spreiding (IQR)", "—" if not math.isfinite(feats.pitch_iqr_hz) else f"{feats.pitch_iqr_hz:.1f} Hz"],
280
+ ["Voiced ratio", "—" if not math.isfinite(feats.voiced_ratio) else f"{feats.voiced_ratio*100:.1f}%"],
281
+ ["Aantal pauzes (≥ 0.2s)", fmt_int(feats.n_pauses)],
282
+ ["Totale pauzeduur", _human_seconds(feats.pause_total_s)],
283
+ ["Actieve-spraak ratio", "—" if not math.isfinite(feats.active_ratio) else f"{feats.active_ratio*100:.1f}%"],
284
  ]
285
 
286
+
287
  def explain_panel(feats: Features) -> str:
 
288
  bullets = []
289
  if math.isfinite(feats.pause_total_s):
290
+ bullets.append(f"- **Pauzes**: {feats.n_pauses} pauzes (≥0.2s), samen {_human_seconds(feats.pause_total_s)}.")
291
  if math.isfinite(feats.pitch_median_hz):
292
  bullets.append(f"- **Pitch**: mediaan ~ {feats.pitch_median_hz:.1f} Hz, spreiding (IQR) {feats.pitch_iqr_hz:.1f} Hz.")
293
  if math.isfinite(feats.rms_mean):
294
  bullets.append(f"- **Volume**: RMS gemiddeld {feats.rms_mean:.3f} (relatief; alleen vergelijken binnen dezelfde setup).")
295
  if math.isfinite(feats.active_ratio):
296
  bullets.append(f"- **Actieve spraak**: ~ {feats.active_ratio*100:.1f}% van de tijd boven drempel.")
297
+
298
  if not bullets:
299
  bullets = ["- Geen features beschikbaar (audio te kort of leeg)."]
300
 
 
307
  "Gebruik dit als gespreksstarter of educatieve visualisatie."
308
  )
309
 
310
+
311
  # -----------------------------
312
  # Core callbacks
313
  # -----------------------------
 
322
  expl = explain_panel(feats)
323
  return gr.Dataframe(value=table, headers=["Kenmerk", "Waarde"]), wf, pc, expl
324
 
325
+
326
  def analyze_compare(a1, a2):
327
  if a1 is None or a2 is None:
328
  return "—", gr.Dataframe(value=[["—", "Selecteer twee fragmenten."]]), None
 
337
  e2 = embed_audio(art2["y"], art2["sr"])
338
  sim = _cosine(e1, e2)
339
 
340
+ def delta(a, b):
 
341
  if (a is None) or (b is None):
342
  return "—"
343
  if (isinstance(a, float) and not math.isfinite(a)) or (isinstance(b, float) and not math.isfinite(b)):
 
345
  return f"{(b - a):+.3f}"
346
 
347
  rows = [
348
+ ["Duur (s)", f1.duration_s, f2.duration_s, delta(f1.duration_s, f2.duration_s)],
349
+ ["RMS mean", f1.rms_mean, f2.rms_mean, delta(f1.rms_mean, f2.rms_mean)],
350
+ ["Pitch mediaan (Hz)", f1.pitch_median_hz, f2.pitch_median_hz, delta(f1.pitch_median_hz, f2.pitch_median_hz)],
351
  ["Pauzes (#)", float(f1.n_pauses), float(f2.n_pauses), f"{(f2.n_pauses - f1.n_pauses):+d}"],
352
+ ["Pauzeduur (s)", f1.pause_total_s, f2.pause_total_s, delta(f1.pause_total_s, f2.pause_total_s)],
353
+ ["Actieve ratio", f1.active_ratio, f2.active_ratio, delta(f1.active_ratio, f2.active_ratio)],
354
  ]
355
 
 
356
  formatted = []
357
  for k, v1, v2, dv in rows:
358
+ def fmt(v):
359
  if isinstance(v, float) and math.isfinite(v):
360
  if "ratio" in k.lower():
361
  return f"{v*100:.1f}%"
362
  if "pitch" in k.lower():
363
  return f"{v:.1f}"
 
 
364
  return f"{v:.3f}"
 
 
365
  return "—"
366
+ formatted.append([k, fmt(v1), fmt(v2), dv])
367
 
 
368
  fig = plt.figure(figsize=(10, 3.2))
369
  ax = fig.add_subplot(111)
370
+
371
  def prep_plot(y, sr):
372
  if sr != TARGET_SR:
373
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SR)
 
379
 
380
  t1, yy1 = prep_plot(y1, sr1)
381
  t2, yy2 = prep_plot(y2, sr2)
382
+
383
  if yy1.size:
384
  ax.plot(t1, yy1, linewidth=0.8, label="Fragment A")
385
  if yy2.size:
386
  ax.plot(t2, yy2, linewidth=0.8, label="Fragment B", alpha=0.8)
387
+
388
  ax.set_title("Waveform overlay (eerste max 20s)")
389
  ax.set_xlabel("Tijd (s)")
390
  ax.set_ylabel("Amplitude")
391
  ax.legend(loc="upper right")
392
  fig.tight_layout()
393
 
394
+ return f"{sim*100:.1f}%", gr.Dataframe(value=formatted, headers=["Kenmerk", "A", "B", "Δ (B−A)"]), fig
395
+
396
 
397
  # -----------------------------
398
  # UI
 
401
  :root{
402
  --bg: #0b0f19;
403
  --panel: rgba(255,255,255,0.06);
 
404
  --text: rgba(255,255,255,0.92);
405
  --muted: rgba(255,255,255,0.70);
 
 
406
  --border: rgba(255,255,255,0.14);
407
  --shadow: 0 10px 30px rgba(0,0,0,0.35);
408
  }
 
437
  line-height: 1.45;
438
  }
439
 
 
 
 
 
 
 
 
 
440
  .badge{
441
  display: inline-flex;
442
  align-items: center;
 
505
  """
506
  )
507
  with gr.Column(scale=7):
508
+ feat_df = gr.Dataframe(
509
+ headers=["Kenmerk", "Waarde"],
510
+ datatype=["str", "str"],
511
+ interactive=False,
512
+ wrap=True,
513
+ label="Meetbare kenmerken",
514
+ )
515
+ wf_plot = gr.Plot(label="Waveform + pauzes")
516
+ pitch_plot = gr.Plot(label="Pitch")
517
+ explanation = gr.Markdown("### Upload of neem audio op", elem_id="explain-card")
 
 
 
518
 
519
  run_btn.click(analyze_single, inputs=[input_audio], outputs=[feat_df, wf_plot, pitch_plot, explanation])
520
 
 
537
  datatype=["str", "str", "str", "str"],
538
  interactive=False,
539
  wrap=True,
540
+ label="Verschillen (uitlegbaar)",
541
  )
542
  overlay_plot = gr.Plot(label="Waveform overlay")
543
 
 
551
  - We tonen **verschillen** tussen fragmenten, i.p.v. één eindlabel.
552
  - We geven **geen diagnose** of medische claim; de output is bedoeld als **observatie**.
553
  - In een zorgcontext hoort interpretatie altijd samen te gaan met **context + gesprek + klinisch oordeel**.
 
 
 
554
  """
555
  )
556
 
557
  return demo
558
 
559
+
560
  if __name__ == "__main__":
561
  demo = build_demo()
562
  demo.queue(max_size=32)
563
  demo.launch()
564
+ ```