Marcel0123 commited on
Commit
5216a05
·
verified ·
1 Parent(s): 7416f7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +348 -144
app.py CHANGED
@@ -7,23 +7,17 @@ import matplotlib.pyplot as plt
7
 
8
  from dataclasses import dataclass
9
  from typing import Dict, Any, Tuple, List
10
- from functools import lru_cache
11
 
12
- import torch
13
- from transformers import Wav2Vec2Model, Wav2Vec2FeatureExtractor
14
-
15
- # =========================================================
16
- # Configuration
17
- # =========================================================
18
  TARGET_SR = 16000
19
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
- MODEL_ID = os.getenv("W2V_MODEL_ID", "facebook/wav2vec2-base-960h")
21
 
22
- # =========================================================
23
- # Utility helpers
24
- # =========================================================
25
  def human_seconds(sec: float) -> str:
26
- if not math.isfinite(sec):
27
  return "—"
28
  if sec < 60:
29
  return f"{sec:.1f}s"
@@ -31,70 +25,73 @@ def human_seconds(sec: float) -> str:
31
  return f"{m}m {sec - 60*m:.1f}s"
32
 
33
 
34
- def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
35
- denom = (np.linalg.norm(a) * np.linalg.norm(b)) + 1e-9
36
- return float(np.dot(a, b) / denom)
37
-
38
-
39
- # =========================================================
40
- # Model loading (cached)
41
- # =========================================================
42
- @lru_cache(maxsize=1)
43
- def load_wav2vec():
44
- extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
45
- model = Wav2Vec2Model.from_pretrained(MODEL_ID).to(DEVICE)
46
- model.eval()
47
- return extractor, model
48
-
49
-
50
- def embed_audio(y: np.ndarray, sr: int) -> np.ndarray:
51
- if sr != TARGET_SR:
52
- y = librosa.resample(y, sr, TARGET_SR)
53
-
54
- if y.size == 0:
55
- return np.zeros(768, dtype=np.float32)
56
-
57
- y = y.astype(np.float32)
58
- y /= np.max(np.abs(y)) + 1e-9
59
-
60
- extractor, model = load_wav2vec()
61
- inputs = extractor(y, sampling_rate=TARGET_SR, return_tensors="pt")
62
-
63
- with torch.no_grad():
64
- out = model(inputs["input_values"].to(DEVICE))
65
- emb = out.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy()
66
-
67
- return emb.astype(np.float32)
68
 
69
 
70
- # =========================================================
71
- # Feature extraction
72
- # =========================================================
73
  @dataclass
74
  class Features:
75
  duration_s: float
76
  rms_mean: float
77
  rms_std: float
78
- pitch_median: float
79
- pitch_iqr: float
 
 
80
  n_pauses: int
81
  pause_total_s: float
82
  active_ratio: float
83
 
84
 
85
  def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  if sr != TARGET_SR:
87
- y = librosa.resample(y, sr, TARGET_SR)
88
  sr = TARGET_SR
 
 
89
 
90
- duration = len(y) / sr
91
- hop = 160
92
- frame = 400
 
 
 
 
 
93
 
94
  rms = librosa.feature.rms(y=y, frame_length=frame, hop_length=hop)[0]
95
- rms_mean = float(np.mean(rms))
96
- rms_std = float(np.std(rms))
 
 
 
97
 
 
98
  try:
99
  f0, _, _ = librosa.pyin(
100
  y,
@@ -107,127 +104,334 @@ def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
107
  except Exception:
108
  f0 = None
109
 
110
- if f0 is not None and np.any(np.isfinite(f0)):
111
- voiced = f0[np.isfinite(f0)]
112
- pitch_median = float(np.median(voiced))
113
- pitch_iqr = float(np.percentile(voiced, 75) - np.percentile(voiced, 25))
 
 
114
  else:
115
- pitch_median = np.nan
116
- pitch_iqr = np.nan
117
-
118
- silence = rms < np.percentile(rms, 20)
119
- min_pause_frames = int(0.2 / (hop / sr))
120
-
121
- pauses = []
122
- start = None
123
- for i, s in enumerate(silence):
124
- if s and start is None:
125
- start = i
126
- if not s and start is not None:
127
- if i - start >= min_pause_frames:
128
- pauses.append((start, i))
129
- start = None
130
-
131
- pause_total = sum((e - s) * hop / sr for s, e in pauses)
132
- active_ratio = 1.0 - float(np.mean(silence))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  feats = Features(
135
  duration_s=duration,
136
  rms_mean=rms_mean,
137
  rms_std=rms_std,
138
- pitch_median=pitch_median,
139
- pitch_iqr=pitch_iqr,
140
- n_pauses=len(pauses),
141
- pause_total_s=pause_total,
 
 
142
  active_ratio=active_ratio,
143
  )
144
 
145
  artifacts = {
146
  "y": y,
147
  "sr": sr,
 
 
148
  "rms": rms,
149
- "pitch": f0,
 
 
150
  "pauses": pauses,
151
- "hop": hop,
152
  }
153
-
154
  return feats, artifacts
155
 
156
 
157
- # =========================================================
158
  # Plotting
159
- # =========================================================
160
- def plot_waveform(artifacts: Dict[str, Any]):
161
- y = artifacts["y"]
162
- sr = artifacts["sr"]
163
- pauses = artifacts["pauses"]
164
- hop = artifacts["hop"]
165
-
166
- fig = plt.figure(figsize=(10, 3))
167
  ax = fig.add_subplot(111)
168
 
169
- t = np.arange(len(y)) / sr
170
- ax.plot(t, y, lw=0.8)
171
-
172
- for s, e in pauses:
173
- ax.axvspan(s * hop / sr, e * hop / sr, alpha=0.2)
 
 
 
 
 
 
 
 
174
 
175
- ax.set_title("Waveform met pauzes")
176
- ax.set_xlabel("Tijd (s)")
177
- ax.set_ylabel("Amplitude")
178
  fig.tight_layout()
179
  return fig
180
 
181
 
182
- # =========================================================
183
- # UI callbacks
184
- # =========================================================
185
- def analyze_single(audio):
186
- if audio is None:
187
- return [], None, "Upload of neem audio op."
188
 
189
- sr, y = audio
190
- feats, art = compute_features(y, sr)
 
 
 
 
 
 
 
 
 
191
 
192
- table = [
 
 
 
 
 
 
 
 
 
 
 
193
  ["Duur", human_seconds(feats.duration_s)],
194
- ["Gemiddeld volume (RMS)", f"{feats.rms_mean:.3f}"],
195
- ["Volume-variatie", f"{feats.rms_std:.3f}"],
196
- ["Pitch mediaan", "—" if not math.isfinite(feats.pitch_median) else f"{feats.pitch_median:.1f} Hz"],
197
- ["Pitch spreiding (IQR)", "—" if not math.isfinite(feats.pitch_iqr) else f"{feats.pitch_iqr:.1f} Hz"],
198
- ["Aantal pauzes ≥0.2s", str(feats.n_pauses)],
 
 
199
  ["Totale pauzeduur", human_seconds(feats.pause_total_s)],
200
- ["Actieve spraakratio", f"{feats.active_ratio*100:.1f}%"],
201
  ]
202
 
203
- fig = plot_waveform(art)
204
- explanation = (
205
- "### Wat laat dit zien?\n"
206
- "- Dit zijn **meetbare spraaksignalen** (pauzes, pitch, volume).\n"
207
- "- Er wordt **geen diagnose** gesteld.\n"
208
- "- Interpretatie hoort altijd samen met context en gesprek."
209
- )
210
-
211
- return table, fig, explanation
212
-
213
 
214
- # =========================================================
215
- # UI
216
- # =========================================================
217
- with gr.Blocks(title="Explainable Speech Analytics") as demo:
218
- gr.Markdown(
219
- "## Explainable Speech Analytics\n"
220
- "*Educatieve demo geen medisch hulpmiddel*"
 
 
 
 
 
 
 
 
 
221
  )
222
 
223
- with gr.Row():
224
- audio = gr.Audio(sources=["upload", "microphone"], type="numpy", label="Audiofragment")
225
- run = gr.Button("Analyseer", variant="primary")
226
 
227
- table = gr.Dataframe(headers=["Kenmerk", "Waarde"], interactive=False)
228
- plot = gr.Plot()
229
- explanation = gr.Markdown()
 
 
 
 
 
 
 
 
230
 
231
- run.click(analyze_single, inputs=audio, outputs=[table, plot, explanation])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  from dataclasses import dataclass
9
  from typing import Dict, Any, Tuple, List
 
10
 
11
+ # -----------------------------
12
+ # Config
13
+ # -----------------------------
 
 
 
14
  TARGET_SR = 16000
 
 
15
 
16
+ # -----------------------------
17
+ # Helpers
18
+ # -----------------------------
19
  def human_seconds(sec: float) -> str:
20
+ if sec is None or not math.isfinite(sec):
21
  return "—"
22
  if sec < 60:
23
  return f"{sec:.1f}s"
 
25
  return f"{m}m {sec - 60*m:.1f}s"
26
 
27
 
28
+ def safe_pct(x: float) -> str:
29
+ if x is None or not math.isfinite(x):
30
+ return "—"
31
+ return f"{x*100:.1f}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
+ # -----------------------------
35
+ # Features
36
+ # -----------------------------
37
  @dataclass
38
  class Features:
39
  duration_s: float
40
  rms_mean: float
41
  rms_std: float
42
+ zcr_mean: float
43
+ pitch_median_hz: float
44
+ pitch_iqr_hz: float
45
+ voiced_ratio: float
46
  n_pauses: int
47
  pause_total_s: float
48
  active_ratio: float
49
 
50
 
51
  def compute_features(y: np.ndarray, sr: int) -> Tuple[Features, Dict[str, Any]]:
52
+ """
53
+ Explainable acoustic features + artifacts for plotting.
54
+ (No medical claims; only measurable signals.)
55
+ """
56
+ if y is None or len(y) == 0:
57
+ f = Features(
58
+ duration_s=float("nan"),
59
+ rms_mean=float("nan"),
60
+ rms_std=float("nan"),
61
+ zcr_mean=float("nan"),
62
+ pitch_median_hz=float("nan"),
63
+ pitch_iqr_hz=float("nan"),
64
+ voiced_ratio=float("nan"),
65
+ n_pauses=0,
66
+ pause_total_s=0.0,
67
+ active_ratio=float("nan"),
68
+ )
69
+ return f, {"y": np.array([]), "sr": sr}
70
+
71
+ # Resample to stable SR
72
  if sr != TARGET_SR:
73
+ y = librosa.resample(y.astype(np.float32), orig_sr=sr, target_sr=TARGET_SR)
74
  sr = TARGET_SR
75
+ else:
76
+ y = y.astype(np.float32)
77
 
78
+ # Normalize [-1, 1] for stable plots
79
+ mx = float(np.max(np.abs(y))) + 1e-9
80
+ y = y / mx
81
+
82
+ duration = float(len(y) / sr)
83
+
84
+ hop = 160 # 10ms @ 16k
85
+ frame = 400 # 25ms @ 16k
86
 
87
  rms = librosa.feature.rms(y=y, frame_length=frame, hop_length=hop)[0]
88
+ zcr = librosa.feature.zero_crossing_rate(y, frame_length=frame, hop_length=hop)[0]
89
+
90
+ rms_mean = float(np.mean(rms)) if rms.size else float("nan")
91
+ rms_std = float(np.std(rms)) if rms.size else float("nan")
92
+ zcr_mean = float(np.mean(zcr)) if zcr.size else float("nan")
93
 
94
+ # Pitch via pyin (can fail on noise/short clips)
95
  try:
96
  f0, _, _ = librosa.pyin(
97
  y,
 
104
  except Exception:
105
  f0 = None
106
 
107
+ if f0 is None:
108
+ pitch = np.array([])
109
+ times = np.array([])
110
+ pitch_median = float("nan")
111
+ pitch_iqr = float("nan")
112
+ voiced_ratio = float("nan")
113
  else:
114
+ pitch = np.asarray(f0, dtype=np.float32)
115
+ times = librosa.frames_to_time(np.arange(len(pitch)), sr=sr, hop_length=hop)
116
+ voiced = np.isfinite(pitch)
117
+ voiced_ratio = float(np.mean(voiced)) if voiced.size else float("nan")
118
+ if np.any(voiced):
119
+ pv = pitch[voiced]
120
+ pitch_median = float(np.median(pv))
121
+ q75, q25 = np.percentile(pv, [75, 25])
122
+ pitch_iqr = float(q75 - q25)
123
+ else:
124
+ pitch_median = float("nan")
125
+ pitch_iqr = float("nan")
126
+
127
+ # Pause detection: low-RMS frames as silence
128
+ if rms.size:
129
+ thr = float(np.percentile(rms, 20)) * 0.8
130
+ silent = rms < thr
131
+
132
+ # pauses >= 0.2s
133
+ min_pause_frames = int(0.2 / (hop / sr))
134
+
135
+ pauses = []
136
+ start = None
137
+ for i, s in enumerate(silent):
138
+ if s and start is None:
139
+ start = i
140
+ if (not s) and start is not None:
141
+ end = i
142
+ if (end - start) >= min_pause_frames:
143
+ pauses.append((start, end))
144
+ start = None
145
+ if start is not None:
146
+ end = len(silent)
147
+ if (end - start) >= min_pause_frames:
148
+ pauses.append((start, end))
149
+
150
+ n_pauses = int(len(pauses))
151
+ pause_total_s = float(sum((e - s) * (hop / sr) for s, e in pauses))
152
+ active_ratio = float(1.0 - np.mean(silent))
153
+ else:
154
+ thr = None
155
+ pauses = []
156
+ n_pauses = 0
157
+ pause_total_s = 0.0
158
+ active_ratio = float("nan")
159
 
160
  feats = Features(
161
  duration_s=duration,
162
  rms_mean=rms_mean,
163
  rms_std=rms_std,
164
+ zcr_mean=zcr_mean,
165
+ pitch_median_hz=pitch_median,
166
+ pitch_iqr_hz=pitch_iqr,
167
+ voiced_ratio=voiced_ratio,
168
+ n_pauses=n_pauses,
169
+ pause_total_s=pause_total_s,
170
  active_ratio=active_ratio,
171
  )
172
 
173
  artifacts = {
174
  "y": y,
175
  "sr": sr,
176
+ "hop": hop,
177
+ "frame": frame,
178
  "rms": rms,
179
+ "zcr": zcr,
180
+ "times": times,
181
+ "pitch": pitch,
182
  "pauses": pauses,
183
+ "rms_thr": thr,
184
  }
 
185
  return feats, artifacts
186
 
187
 
188
+ # -----------------------------
189
  # Plotting
190
+ # -----------------------------
191
+ def plot_waveform_with_pauses(art: Dict[str, Any]) -> plt.Figure:
192
+ y = art["y"]
193
+ sr = art["sr"]
194
+ hop = art["hop"]
195
+ pauses = art.get("pauses", [])
196
+
197
+ fig = plt.figure(figsize=(10, 3.2))
198
  ax = fig.add_subplot(111)
199
 
200
+ if y.size:
201
+ t = np.arange(len(y)) / sr
202
+ ax.plot(t, y, linewidth=0.8)
203
+ for (s, e) in pauses:
204
+ ts = s * (hop / sr)
205
+ te = e * (hop / sr)
206
+ ax.axvspan(ts, te, alpha=0.2)
207
+ ax.set_title("Waveform (met gedetecteerde pauzes)")
208
+ ax.set_xlabel("Tijd (s)")
209
+ ax.set_ylabel("Amplitude")
210
+ else:
211
+ ax.text(0.5, 0.5, "Geen audio", ha="center", va="center")
212
+ ax.set_axis_off()
213
 
 
 
 
214
  fig.tight_layout()
215
  return fig
216
 
217
 
218
+ def plot_pitch(art: Dict[str, Any]) -> plt.Figure:
219
+ pitch = art.get("pitch", np.array([]))
220
+ times = art.get("times", np.array([]))
 
 
 
221
 
222
+ fig = plt.figure(figsize=(10, 3.2))
223
+ ax = fig.add_subplot(111)
224
+
225
+ if pitch.size and times.size:
226
+ ax.plot(times, pitch, linewidth=1.0)
227
+ ax.set_title("Pitch contour (NaN = onvoiced)")
228
+ ax.set_xlabel("Tijd (s)")
229
+ ax.set_ylabel("Pitch (Hz)")
230
+ else:
231
+ ax.text(0.5, 0.5, "Pitch niet beschikbaar (te kort/ruis)", ha="center", va="center")
232
+ ax.set_axis_off()
233
 
234
+ fig.tight_layout()
235
+ return fig
236
+
237
+
238
+ # -----------------------------
239
+ # UI formatting
240
+ # -----------------------------
241
+ def features_table(feats: Features) -> List[List[str]]:
242
+ def f3(x):
243
+ return "—" if (x is None or not math.isfinite(x)) else f"{float(x):.3f}"
244
+
245
+ return [
246
  ["Duur", human_seconds(feats.duration_s)],
247
+ ["Volume (RMS) gemiddeld", f3(feats.rms_mean)],
248
+ ["Volume (RMS) variatie", f3(feats.rms_std)],
249
+ ["ZCR (ruis/‘scherpte’) gemiddeld", f3(feats.zcr_mean)],
250
+ ["Pitch mediaan", "—" if not math.isfinite(feats.pitch_median_hz) else f"{feats.pitch_median_hz:.1f} Hz"],
251
+ ["Pitch spreiding (IQR)", "—" if not math.isfinite(feats.pitch_iqr_hz) else f"{feats.pitch_iqr_hz:.1f} Hz"],
252
+ ["Voiced ratio", safe_pct(feats.voiced_ratio)],
253
+ ["Aantal pauzes (≥ 0.2s)", str(int(feats.n_pauses))],
254
  ["Totale pauzeduur", human_seconds(feats.pause_total_s)],
255
+ ["Actieve-spraak ratio", safe_pct(feats.active_ratio)],
256
  ]
257
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ def explain_text(feats: Features) -> str:
260
+ bullets = []
261
+ bullets.append(f"- **Pauzes**: {feats.n_pauses} pauzes (≥0.2s), totaal {human_seconds(feats.pause_total_s)}.")
262
+ if math.isfinite(feats.pitch_median_hz):
263
+ bullets.append(f"- **Pitch**: mediaan ~ {feats.pitch_median_hz:.1f} Hz, spreiding {feats.pitch_iqr_hz:.1f} Hz (IQR).")
264
+ if math.isfinite(feats.rms_mean):
265
+ bullets.append(f"- **Volume**: RMS gemiddeld {feats.rms_mean:.3f} (relatief; vooral binnen dezelfde setup vergelijken).")
266
+ bullets.append(f"- **Actieve spraak**: {safe_pct(feats.active_ratio)} van de tijd boven drempel.")
267
+
268
+ return (
269
+ "### Wat ‘ziet’ de AI hier?\n"
270
+ "Dit is een **uitleg-demo**: we tonen *meetbare spraaksignalen* (niet ‘waarom’ ze veranderen).\n\n"
271
+ + "\n".join(bullets)
272
+ + "\n\n"
273
+ "**Belangrijk:** dit is **geen diagnose** en **geen medisch hulpmiddel**. "
274
+ "Gebruik dit als **educatieve visualisatie** of gespreksstarter."
275
  )
276
 
 
 
 
277
 
278
+ # -----------------------------
279
+ # Callback
280
+ # -----------------------------
281
+ def analyze_one(audio: Tuple[int, np.ndarray]):
282
+ if audio is None:
283
+ return (
284
+ gr.Dataframe(value=[["—", "Upload of neem audio op om te starten."]], headers=["Kenmerk", "Waarde"]),
285
+ None,
286
+ None,
287
+ "### Upload of neem audio op",
288
+ )
289
 
290
+ sr, y = audio
291
+ feats, art = compute_features(y, sr)
292
+ table = features_table(feats)
293
+ wf = plot_waveform_with_pauses(art)
294
+ pc = plot_pitch(art)
295
+ expl = explain_text(feats)
296
+
297
+ return gr.Dataframe(value=table, headers=["Kenmerk", "Waarde"]), wf, pc, expl
298
+
299
+
300
+ # -----------------------------
301
+ # Polished UI
302
+ # -----------------------------
303
+ CSS = """
304
+ :root{
305
+ --bg: #0b0f19;
306
+ --panel: rgba(255,255,255,0.06);
307
+ --text: rgba(255,255,255,0.92);
308
+ --muted: rgba(255,255,255,0.72);
309
+ --border: rgba(255,255,255,0.14);
310
+ --shadow: 0 12px 30px rgba(0,0,0,0.35);
311
+ }
312
+
313
+ .gradio-container{
314
+ background:
315
+ radial-gradient(1200px 700px at 10% 10%, rgba(124,58,237,0.25), transparent 55%),
316
+ radial-gradient(900px 600px at 90% 20%, rgba(34,197,94,0.18), transparent 55%),
317
+ radial-gradient(1100px 800px at 40% 100%, rgba(59,130,246,0.15), transparent 60%),
318
+ var(--bg) !important;
319
+ color: var(--text) !important;
320
+ }
321
+
322
+ #header{
323
+ background: linear-gradient(135deg, rgba(124,58,237,0.22), rgba(34,197,94,0.14));
324
+ border: 1px solid var(--border);
325
+ border-radius: 18px;
326
+ padding: 18px 18px 14px 18px;
327
+ box-shadow: var(--shadow);
328
+ }
329
+
330
+ #title{
331
+ font-size: 28px;
332
+ font-weight: 780;
333
+ letter-spacing: -0.02em;
334
+ margin: 0;
335
+ }
336
+
337
+ #subtitle{
338
+ margin-top: 8px;
339
+ color: var(--muted);
340
+ font-size: 14px;
341
+ line-height: 1.45;
342
+ }
343
+
344
+ .badge{
345
+ display: inline-flex;
346
+ align-items: center;
347
+ gap: 8px;
348
+ padding: 6px 10px;
349
+ border-radius: 999px;
350
+ border: 1px solid var(--border);
351
+ background: rgba(255,255,255,0.05);
352
+ color: var(--muted);
353
+ font-size: 12px;
354
+ margin-right: 10px;
355
+ margin-bottom: 8px;
356
+ }
357
+ .badge b{ color: var(--text); font-weight: 720; }
358
+
359
+ .card{
360
+ background: var(--panel);
361
+ border: 1px solid var(--border);
362
+ border-radius: 18px;
363
+ padding: 14px;
364
+ box-shadow: var(--shadow);
365
+ }
366
+ """
367
+
368
+ def build_ui():
369
+ with gr.Blocks(
370
+ css=CSS,
371
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="emerald"),
372
+ title="Explainable Speech Analytics (Demo)",
373
+ ) as demo:
374
+
375
+ gr.HTML(
376
+ """
377
+ <div id="header">
378
+ <p id="title">Explainable Speech Analytics</p>
379
+ <div id="subtitle">
380
+ <span class="badge"><b>Doel</b> inzicht in spraaksignalen</span>
381
+ <span class="badge"><b>Geen diagnose</b> geen medisch hulpmiddel</span>
382
+ <span class="badge"><b>Anti–black box</b> we tonen signalen, niet alleen scores</span>
383
+ <p style="margin-top:10px">
384
+ Upload of neem een kort fragment op. Je ziet daarna <b>pauzes</b>, <b>pitch</b> en <b>volume-energie</b>
385
+ in grafieken en tabellen — bedoeld als uitleg en dialoog, niet als oordeel.
386
+ </p>
387
+ </div>
388
+ </div>
389
+ """
390
+ )
391
 
392
+ with gr.Row():
393
+ with gr.Column(scale=5):
394
+ audio = gr.Audio(label="Audio", sources=["upload", "microphone"], type="numpy")
395
+ run = gr.Button("Analyseer", variant="primary")
396
+ with gr.Accordion("Wat gebeurt er technisch?", open=False):
397
+ gr.Markdown(
398
+ """
399
+ - We extraheren **akoestische kenmerken** (RMS, ZCR), schatten **pitch** met *pyin*,
400
+ en detecteren **pauzes** via een adaptieve energiedrempel.
401
+ - We tonen de gemeten signalen als grafieken zodat het **uitlegbaar** blijft.
402
+ """
403
+ )
404
+
405
+ with gr.Column(scale=7):
406
+ feats_df = gr.Dataframe(
407
+ headers=["Kenmerk", "Waarde"],
408
+ datatype=["str", "str"],
409
+ interactive=False,
410
+ wrap=True,
411
+ label="Meetbare kenmerken",
412
+ )
413
+ wf_plot = gr.Plot(label="Waveform + pauzes")
414
+ pitch_plot = gr.Plot(label="Pitch")
415
+ explanation = gr.Markdown("### Upload of neem audio op", elem_classes=["card"])
416
+
417
+ run.click(analyze_one, inputs=[audio], outputs=[feats_df, wf_plot, pitch_plot, explanation])
418
+
419
+ with gr.Accordion("Ethiek & transparantie", open=False):
420
+ gr.Markdown(
421
+ """
422
+ - Deze demo geeft **geen diagnose** en maakt **geen klinische claim**.
423
+ - Output is bedoeld als **observatie** (meetbare signalen) om gesprekken te ondersteunen.
424
+ - In zorgcontext: interpretatie hoort altijd samen met **context + gesprek + klinisch oordeel**.
425
+ """
426
+ )
427
+
428
+ return demo
429
+
430
+
431
+ if __name__ == "__main__":
432
+ demo = build_ui()
433
+ demo.queue(max_size=32)
434
+
435
+ # HF Spaces-proof: use the port provided by the platform
436
+ port = int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", "7860")))
437
+ demo.launch(server_name="0.0.0.0", server_port=port)