Marcel0123 commited on
Commit
1e9171f
·
verified ·
1 Parent(s): 597caf5

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +82 -174
app.py CHANGED
@@ -10,9 +10,6 @@ import plotly.graph_objects as go
10
  import plotly.express as px
11
  import time
12
 
13
- # ======================
14
- # NL labels
15
- # ======================
16
  FEATURE_LABELS = {
17
  "age": "Leeftijd",
18
  "sex": "Geslacht",
@@ -50,28 +47,22 @@ In de praktijk gebruiken artsen en onderzoekers zo'n plot om patronen en verband
50
  Met AI kunnen we patronen vinden die je met het blote oog nooit zou zien. Dat maakt dit niet alleen een mooie visualisatie, maar ook een knap stukje technologie met échte waarde voor onderzoek en zorg.
51
 
52
  **Speel zelf de onderzoeker!**
53
- Doe alsof je een arts bent en kies links bovenin een waarde, bijvoorbeeld **cholesterol**, **leeftijd** of **geslacht**. Klik daarna op *Update visualisaties* en ontdek je eigen patronen in de data.
54
  """
55
 
56
- # ======================
57
- # Data helpers
58
- # ======================
59
  def load_diabetes_df():
60
  d = datasets.load_diabetes()
61
- X = pd.DataFrame(d.data, columns=d.feature_names) # standardized features
62
  y = pd.Series(d.target, name="target")
63
- df = X.copy()
64
- df["target"] = y
65
  return df
66
 
67
  def compute_overview_table(df: pd.DataFrame):
68
- """Gemiddelde + % boven/onder gemiddelde voor kernmetingen (gestandaardiseerd)."""
69
  keys = ["bmi","bp","s1","s2","s3","s4","s5","s6"]
70
  rows = []
71
  for k in keys:
72
  vals = df[k].dropna().values
73
- if vals.size == 0:
74
- continue
75
  mean = float(vals.mean())
76
  pct_above = float((vals > 0).mean() * 100.0) # 0 ≈ globaal gemiddelde
77
  pct_below = float((vals < 0).mean() * 100.0)
@@ -82,35 +73,11 @@ def compute_overview_table(df: pd.DataFrame):
82
  "% onder gemiddelde": round(pct_below, 1),
83
  })
84
  table = pd.DataFrame(rows)
85
- note = (
86
- "Let op: waarden in deze dataset zijn **gestandaardiseerd**. "
87
- "`0` betekent ongeveer het **algemene gemiddelde**. "
88
- "Positief = hoger dan gemiddeld, negatief = lager dan gemiddeld."
89
- )
90
  return table, note
91
 
92
- def compute_top_correlations(df: pd.DataFrame, top_n: int = 6):
93
- feats = [c for c in df.columns if c != "target"]
94
- corr = pd.DataFrame(df[feats]).corr()
95
- pairs = []
96
- for i, a in enumerate(feats):
97
- for j, b in enumerate(feats):
98
- if j <= i:
99
- continue
100
- pairs.append({
101
- "Combinatie": f"{FEATURE_LABELS.get(a,a)} ↔ {FEATURE_LABELS.get(b,b)}",
102
- "Correlatie": corr.loc[a, b]
103
- })
104
- out = pd.DataFrame(pairs)
105
- out["Sterkte (|r|)"] = out["Correlatie"].abs()
106
- out = out.sort_values("Sterkte (|r|)", ascending=False).head(top_n).reset_index(drop=True)
107
- out["Correlatie"] = out["Correlatie"].round(3)
108
- out["Sterkte (|r|)"] = out["Sterkte (|r|)"].round(3)
109
- return out[["Combinatie", "Correlatie", "Sterkte (|r|)"]]
110
-
111
- # ======================
112
- # PCA helpers
113
- # ======================
114
  def compute_pca(df: pd.DataFrame, n_components: int, standardize: bool):
115
  feats = [c for c in df.columns if c != "target"]
116
  X = df[feats].values
@@ -125,131 +92,87 @@ def compute_pca(df: pd.DataFrame, n_components: int, standardize: bool):
125
  expl = pca.explained_variance_ratio_
126
  return feats, Xs, Z, loadings, expl
127
 
128
- # ======================
129
- # Plot builders
130
- # ======================
131
  def build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=2.0):
132
- # Hovertext met relevante velden
133
- hover_text = []
134
  fields = ["bmi","bp","s1","s2","s3","s4","s5","s6","age","sex","target"]
135
- for idx in range(len(df)):
136
- parts = [f"{FEATURE_LABELS.get(k,k)}: {df.iloc[idx][k]:.3f}" for k in fields]
137
- hover_text.append("<br>".join(parts))
138
-
139
  fig = go.Figure()
140
  fig.add_trace(go.Scatter(
141
- x=Z[:, 0], y=Z[:, 1],
142
- mode="markers",
143
  marker=dict(size=8, color=df[color_key].values),
144
- text=hover_text,
145
- hovertemplate="%{text}<extra></extra>"
146
  ))
147
-
148
- # Loading arrows (PC1/PC2)
149
  for i, key in enumerate(feats):
150
- x = loadings[i, 0] * arrow_scale
151
- y = loadings[i, 1] * arrow_scale
152
- fig.add_annotation(x=x, y=y, ax=0, ay=0,
153
- xref="x", yref="y", axref="x", ayref="y",
154
  showarrow=True, arrowhead=3)
155
- fig.add_annotation(x=x*1.05, y=y*1.05, text=FEATURE_LABELS.get(key, key),
156
  showarrow=False, font=dict(size=10))
157
-
158
- fig.update_layout(
159
- title="PCA-biplot (2D, hover voor details)",
160
- xaxis_title="PC1",
161
- yaxis_title="PC2",
162
- height=520,
163
- margin=dict(l=0, r=0, t=40, b=0)
164
- )
165
  return fig
166
 
167
  def build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=2.0, point_size=32, alpha=0.85):
168
- # Matplotlib variant voor PNG-export
169
- fig = plt.figure(figsize=(7.8, 5.6))
170
  ax = fig.add_subplot(111)
171
- sc = ax.scatter(Z[:, 0], Z[:, 1], c=df[color_key].values, s=point_size, alpha=alpha)
172
- cbar = plt.colorbar(sc, ax=ax, pad=0.02)
173
- cbar.set_label(f"Kleur: {FEATURE_LABELS.get(color_key, color_key)}")
174
- ax.set_xlabel("PC1"); ax.set_ylabel("PC2")
175
- ax.set_title("PCA-biplot — punten + pijlen")
176
- for i, key in enumerate(feats):
177
- x = loadings[i, 0] * arrow_scale
178
- y = loadings[i, 1] * arrow_scale
179
- ax.arrow(0, 0, x, y, head_width=0.05, head_length=0.08, fc="k", ec="k", length_includes_head=True)
180
- ax.text(x*1.08, y*1.08, FEATURE_LABELS.get(key, key), fontsize=9, ha="center", va="center")
181
- ax.axhline(0, color="grey", linewidth=0.6, linestyle=":")
182
- ax.axvline(0, color="grey", linewidth=0.6, linestyle=":")
183
- ax.grid(True, linestyle=":", linewidth=0.6)
184
- plt.tight_layout()
185
  return fig
186
 
187
  def build_pca3d(Z3, color_vals):
188
- fig = go.Figure(data=[go.Scatter3d(
189
- x=Z3[:, 0], y=Z3[:, 1], z=Z3[:, 2],
190
- mode="markers",
191
- marker=dict(size=4, color=color_vals, opacity=0.85)
192
- )])
193
- fig.update_layout(
194
- title="PCA 3D — PC1 · PC2 · PC3 (sleep om te draaien)",
195
- scene=dict(xaxis_title="PC1", yaxis_title="PC2", zaxis_title="PC3"),
196
- margin=dict(l=0, r=0, t=40, b=0),
197
- height=520
198
- )
199
  return fig
200
 
201
  def build_variance_plot(expl):
202
- fig = plt.figure(figsize=(7.8, 3.8))
203
  ax = fig.add_subplot(111)
204
- xs = np.arange(1, len(expl) + 1)
205
  ax.bar(xs, expl, width=0.8, align="center")
206
  ax.plot(xs, np.cumsum(expl), marker="o")
207
- ax.set_xticks(xs)
208
- ax.set_xlabel("Principal Component")
209
- ax.set_ylabel("Explained variance ratio")
210
  ax.set_title("Uitlegvariantie per component (balken) + cumulatief (lijn)")
211
- ax.grid(True, linestyle=":", linewidth=0.6)
212
- plt.tight_layout()
213
- return fig
214
-
215
- def build_corr_heatmap(df: pd.DataFrame):
216
- feats = [c for c in df.columns if c != "target"]
217
- corr = pd.DataFrame(df[feats]).corr()
218
- order = corr.abs().sum().sort_values(ascending=False).index.tolist()
219
- corr_sorted = corr.loc[order, order]
220
- fig = px.imshow(corr_sorted, text_auto=False, aspect="auto",
221
- color_continuous_scale="RdBu", origin="lower", zmin=-1, zmax=1)
222
- fig.update_layout(title="Correlatie-heatmap (gesorteerd op sterkte)",
223
- height=520, margin=dict(l=0, r=0, t=40, b=0))
224
  return fig
225
 
226
  def build_hist_box(df: pd.DataFrame, color_key: str):
227
  series = df[color_key].dropna()
228
- fig_hist = px.histogram(series, nbins=30, title=f"Histogram — {FEATURE_LABELS.get(color_key,color_key)}")
229
- fig_box = px.box(series, points="outliers", title=f"Boxplot — {FEATURE_LABELS.get(color_key,color_key)}")
 
 
230
  return fig_hist, fig_box
231
 
232
- # ======================
233
- # Controllers
234
- # ======================
235
  def controller(color_label="BMI (Body Mass Index)", n_components=10, standardize=True, arrow_scale=2.0):
236
  df = load_diabetes_df()
237
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
238
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
239
  color_vals = df[color_key].values
240
- color_label_nl = FEATURE_LABELS.get(color_key, color_key)
241
 
242
- # Plots
243
  fig_biplot = build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale)
244
- # 3D (zorg voor minstens 3 componenten)
245
  if Z.shape[1] < 3:
246
- pca3 = PCA(n_components=3)
247
- Z3 = pca3.fit_transform(Xs)
248
  else:
249
  Z3 = Z[:, :3]
250
  fig3d = build_pca3d(Z3, color_vals)
251
  fig_variance = build_variance_plot(expl)
252
- fig_heatmap = build_corr_heatmap(df)
253
  fig_hist, fig_box = build_hist_box(df, color_key)
254
 
255
  # Tabel top-features
@@ -272,13 +195,13 @@ def controller(color_label="BMI (Body Mass Index)", n_components=10, standardize
272
 
273
  summary_md = f"""
274
  ### Wat zie je hier?
 
275
  - **Hover** over punten voor exacte waarden (BMI, bloeddruk, cholesterol, glucose, leeftijd, geslacht, etc.).
276
  - **2D-biplot** met pijlen (belangrijkste metingen) en **3D-view** voor extra diepte.
277
  - **Uitlegvariantieplot**: laat zien hoeveel variatie elke component uitlegt.
278
- - **Correlatie-heatmap**: toont welke metingen samen bewegen (gesorteerd op sterkte).
279
- - **Histogram + boxplot**: verdeling en spreiding van de gekozen meting ({color_label_nl}).
280
  """
281
- return fig_biplot, fig3d, fig_variance, table, overview_df, overview_note, summary_md, fig_heatmap, fig_hist, fig_box
282
 
283
  def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85, n_components=10, standardize=True, frames=40, pause=0.0):
284
  df = load_diabetes_df()
@@ -287,19 +210,16 @@ def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85,
287
  color_vals = df[color_key].values
288
  for i in range(frames):
289
  t = i / max(1, frames-1)
290
- w1 = min(1.0, t * 2.0)
291
- w2 = max(0.0, (t - 0.5) * 2.0)
292
  coords = np.column_stack([Z[:, 0] * w1, Z[:, 1] * w2])
293
- fig = plt.figure(figsize=(7.8, 5.6))
294
  ax = fig.add_subplot(111)
295
  ax.scatter(coords[:, 0], coords[:, 1], c=color_vals, s=point_size, alpha=alpha)
296
  ax.set_xlabel("PC1 (opbouw)"); ax.set_ylabel("PC2 (opbouw)")
297
  title = "PCA-projectie (animatie) — " + ("PC1 →" if w2 == 0 else "PC1 + PC2")
298
  ax.set_title(f"{title} — frame {i+1}/{frames}")
299
- ax.axhline(0, color="grey", linewidth=0.6, linestyle=":")
300
- ax.axvline(0, color="grey", linewidth=0.6, linestyle=":")
301
- ax.grid(True, linestyle=":", linewidth=0.6)
302
- plt.tight_layout()
303
  yield fig
304
  if pause > 0:
305
  time.sleep(pause)
@@ -310,8 +230,7 @@ def export_biplot_png(color_label="BMI (Body Mass Index)", arrow_scale=2.0, poin
310
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
311
  fig = build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale, point_size=point_size, alpha=alpha)
312
  path = f"/mnt/data/biplot_{int(time.time())}.png"
313
- fig.savefig(path, dpi=150, bbox_inches="tight")
314
- plt.close(fig)
315
  return path
316
 
317
  def export_variance_png(n_components=10, standardize=True):
@@ -319,24 +238,21 @@ def export_variance_png(n_components=10, standardize=True):
319
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
320
  fig = build_variance_plot(expl)
321
  path = f"/mnt/data/variance_{int(time.time())}.png"
322
- fig.savefig(path, dpi=150, bbox_inches="tight")
323
- plt.close(fig)
324
  return path
325
 
326
- # ======================
327
- # UI
328
- # ======================
329
- with gr.Blocks(title="PCA Dashboard — Diabetes (compleet)") as demo:
330
  gr.HTML("""
331
  <style>
332
- .card {background:#fff; border:1px solid #e5e7eb; border-radius:12px; padding:14px; box-shadow: 0 1px 4px rgba(0,0,0,0.06);}
333
  .callout {padding:12px 14px; border-left:4px solid #2563eb; background:#f1f5f9; border-radius:8px; margin: 8px 0 18px;}
334
- .smallnote {font-size: 0.92em; opacity: 0.85;}
335
  </style>
336
  """)
337
 
338
- gr.Markdown("# PCA Dashboard — Diabetes (compleet)")
339
  gr.Markdown(MEDICAL_MD)
 
340
 
341
  with gr.Row():
342
  with gr.Column(scale=1):
@@ -347,12 +263,13 @@ with gr.Blocks(title="PCA Dashboard — Diabetes (compleet)") as demo:
347
  n_components = gr.Slider(3, 10, value=10, step=1, label="Aantal PCA-componenten")
348
  standardize = gr.Checkbox(value=True, label="Standaardiseer metingen (aanbevolen)")
349
  arrow_scale = gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="Pijl-schaal (2D-biplot)")
350
- run_btn = gr.Button("Update visualisaties")
351
- gr.HTML('<div class="callout smallnote">💡 <b>Tip:</b> Kies links een meting (bijv. BMI of cholesterol) en klik daarna op <b>Update visualisaties</b>.</div>')
352
  with gr.Group():
353
  gr.Markdown("### Animatie")
354
  animate_btn = gr.Button("â–¶ Animate PCA (PC1 → PC2)")
355
- anim_plot = gr.Plot(label="Animatie van projectie")
 
356
  with gr.Group():
357
  gr.Markdown("### Downloads")
358
  dl_biplot = gr.DownloadButton("Download biplot (PNG)")
@@ -362,51 +279,42 @@ with gr.Blocks(title="PCA Dashboard — Diabetes (compleet)") as demo:
362
  with gr.Row():
363
  with gr.Column():
364
  gr.Markdown("### Biplot (2D, hover)")
365
- plot_biplot = gr.Plot()
366
  with gr.Column():
367
  gr.Markdown("### 3D PCA (PC1–PC3)")
368
- plot3d = gr.Plot()
369
  with gr.Row():
370
  with gr.Column():
371
  gr.Markdown("### Uitlegvariantie")
372
- plot_expl = gr.Plot()
373
  with gr.Column():
374
- gr.Markdown("### Correlatie-heatmap")
375
- plot_heat = gr.Plot()
376
  with gr.Row():
377
  with gr.Column():
378
  gr.Markdown("### Histogram")
379
- plot_hist = gr.Plot()
380
  with gr.Column():
381
  gr.Markdown("### Boxplot")
382
- plot_box = gr.Plot()
383
  with gr.Row():
384
- with gr.Column():
385
- gr.Markdown("### Top-features (PC1 / PC2)")
386
- table = gr.Dataframe(headers=["Feature (PC1)", "Loading PC1", "Feature (PC2)", "Loading PC2"], row_count=6)
387
  with gr.Column():
388
  gr.Markdown("### Overzicht (gemiddelden & verdeling)")
389
  overview_tbl = gr.Dataframe(interactive=False)
390
- summary = gr.Markdown()
391
- overview_note_md = gr.Markdown()
 
392
 
393
  inputs = [color_feat, n_components, standardize, arrow_scale]
394
  run_btn.click(fn=controller, inputs=inputs,
395
- outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, overview_note_md, summary, plot_heat, plot_hist, plot_box])
396
  demo.load(fn=controller, inputs=inputs,
397
- outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, overview_note_md, summary, plot_heat, plot_hist, plot_box])
398
-
399
- animate_btn.click(fn=animate_pca,
400
- inputs=[color_feat],
401
- outputs=anim_plot)
402
-
403
- # Downloads (PNG)
404
- dl_biplot.click(fn=export_biplot_png,
405
- inputs=[color_feat, arrow_scale],
406
- outputs=[dl_biplot])
407
- dl_var.click(fn=export_variance_png,
408
- inputs=[],
409
- outputs=[dl_var])
410
 
411
  if __name__ == "__main__":
412
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, show_api=False)
 
10
  import plotly.express as px
11
  import time
12
 
 
 
 
13
  FEATURE_LABELS = {
14
  "age": "Leeftijd",
15
  "sex": "Geslacht",
 
47
  Met AI kunnen we patronen vinden die je met het blote oog nooit zou zien. Dat maakt dit niet alleen een mooie visualisatie, maar ook een knap stukje technologie met échte waarde voor onderzoek en zorg.
48
 
49
  **Speel zelf de onderzoeker!**
50
+ Doe alsof je een arts bent en kies links bovenin een waarde, bijvoorbeeld **cholesterol**, **leeftijd** of **geslacht**. Klik daarna op **Update visualisaties** en ontdek je eigen patronen in de data.
51
  """
52
 
53
+ # -------------------- Data helpers --------------------
 
 
54
  def load_diabetes_df():
55
  d = datasets.load_diabetes()
56
+ X = pd.DataFrame(d.data, columns=d.feature_names) # gestandaardiseerd
57
  y = pd.Series(d.target, name="target")
58
+ df = X.copy(); df["target"] = y
 
59
  return df
60
 
61
  def compute_overview_table(df: pd.DataFrame):
 
62
  keys = ["bmi","bp","s1","s2","s3","s4","s5","s6"]
63
  rows = []
64
  for k in keys:
65
  vals = df[k].dropna().values
 
 
66
  mean = float(vals.mean())
67
  pct_above = float((vals > 0).mean() * 100.0) # 0 ≈ globaal gemiddelde
68
  pct_below = float((vals < 0).mean() * 100.0)
 
73
  "% onder gemiddelde": round(pct_below, 1),
74
  })
75
  table = pd.DataFrame(rows)
76
+ note = ("Let op: waarden in deze dataset zijn **gestandaardiseerd**. `0` ≈ algemeen gemiddelde. "
77
+ "Positief = hoger dan gemiddeld, negatief = lager dan gemiddeld.")
 
 
 
78
  return table, note
79
 
80
+ # -------------------- PCA helpers --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def compute_pca(df: pd.DataFrame, n_components: int, standardize: bool):
82
  feats = [c for c in df.columns if c != "target"]
83
  X = df[feats].values
 
92
  expl = pca.explained_variance_ratio_
93
  return feats, Xs, Z, loadings, expl
94
 
95
+ # -------------------- Plot builders --------------------
 
 
96
  def build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=2.0):
97
+ # Hover info
 
98
  fields = ["bmi","bp","s1","s2","s3","s4","s5","s6","age","sex","target"]
99
+ hover_text = [
100
+ "<br>".join(f"{FEATURE_LABELS.get(k,k)}: {df.iloc[i][k]:.3f}" for k in fields)
101
+ for i in range(len(df))
102
+ ]
103
  fig = go.Figure()
104
  fig.add_trace(go.Scatter(
105
+ x=Z[:,0], y=Z[:,1], mode="markers",
 
106
  marker=dict(size=8, color=df[color_key].values),
107
+ text=hover_text, hovertemplate="%{text}<extra></extra>"
 
108
  ))
109
+ # loading pijlen
 
110
  for i, key in enumerate(feats):
111
+ x = loadings[i,0]*arrow_scale; y = loadings[i,1]*arrow_scale
112
+ fig.add_annotation(x=x, y=y, ax=0, ay=0, xref="x", yref="y", axref="x", ayref="y",
 
 
113
  showarrow=True, arrowhead=3)
114
+ fig.add_annotation(x=x*1.05, y=y*1.05, text=FEATURE_LABELS.get(key,key),
115
  showarrow=False, font=dict(size=10))
116
+ fig.update_layout(title="PCA-biplot (2D, hover)", xaxis_title="PC1", yaxis_title="PC2",
117
+ height=520, margin=dict(l=10, r=10, t=40, b=10))
 
 
 
 
 
 
118
  return fig
119
 
120
  def build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=2.0, point_size=32, alpha=0.85):
121
+ fig = plt.figure(figsize=(7.6, 5.2))
 
122
  ax = fig.add_subplot(111)
123
+ sc = ax.scatter(Z[:,0], Z[:,1], c=df[color_key].values, s=point_size, alpha=alpha)
124
+ cbar = plt.colorbar(sc, ax=ax, pad=0.02); cbar.set_label(f"Kleur: {FEATURE_LABELS.get(color_key,color_key)}")
125
+ ax.set_xlabel("PC1"); ax.set_ylabel("PC2"); ax.set_title("PCA-biplot — PNG-export")
126
+ for i,key in enumerate(feats):
127
+ x=loadings[i,0]*arrow_scale; y=loadings[i,1]*arrow_scale
128
+ ax.arrow(0,0,x,y, head_width=0.05, head_length=0.08, fc="k", ec="k", length_includes_head=True)
129
+ ax.text(x*1.08, y*1.08, FEATURE_LABELS.get(key,key), fontsize=9, ha="center", va="center")
130
+ ax.axhline(0,color="grey",linewidth=0.6,linestyle=":"); ax.axvline(0,color="grey",linewidth=0.6,linestyle=":")
131
+ ax.grid(True,linestyle=":",linewidth=0.6); fig.tight_layout()
 
 
 
 
 
132
  return fig
133
 
134
  def build_pca3d(Z3, color_vals):
135
+ fig = go.Figure(data=[go.Scatter3d(x=Z3[:,0], y=Z3[:,1], z=Z3[:,2], mode="markers",
136
+ marker=dict(size=4, color=color_vals, opacity=0.85))])
137
+ fig.update_layout(title="PCA 3D — PC1·PC2·PC3 (sleep om te draaien)",
138
+ scene=dict(xaxis_title="PC1", yaxis_title="PC2", zaxis_title="PC3"),
139
+ height=520, margin=dict(l=10, r=10, t=40, b=10))
 
 
 
 
 
 
140
  return fig
141
 
142
  def build_variance_plot(expl):
143
+ fig = plt.figure(figsize=(7.6, 3.6))
144
  ax = fig.add_subplot(111)
145
+ xs = np.arange(1, len(expl)+1)
146
  ax.bar(xs, expl, width=0.8, align="center")
147
  ax.plot(xs, np.cumsum(expl), marker="o")
148
+ ax.set_xticks(xs); ax.set_xlabel("Principal Component"); ax.set_ylabel("Explained variance ratio")
 
 
149
  ax.set_title("Uitlegvariantie per component (balken) + cumulatief (lijn)")
150
+ ax.grid(True, linestyle=":", linewidth=0.6); fig.tight_layout()
 
 
 
 
 
 
 
 
 
 
 
 
151
  return fig
152
 
153
  def build_hist_box(df: pd.DataFrame, color_key: str):
154
  series = df[color_key].dropna()
155
+ fig_hist = px.histogram(series, nbins=30, title=f"Histogram — {FEATURE_LABELS.get(color_key,color_key)}", height=360)
156
+ fig_hist.update_layout(margin=dict(l=10, r=10, t=40, b=10))
157
+ fig_box = px.box(series, points="outliers", title=f"Boxplot — {FEATURE_LABELS.get(color_key,color_key)}", height=360)
158
+ fig_box.update_layout(margin=dict(l=10, r=10, t=40, b=10))
159
  return fig_hist, fig_box
160
 
161
+ # -------------------- Controllers --------------------
 
 
162
  def controller(color_label="BMI (Body Mass Index)", n_components=10, standardize=True, arrow_scale=2.0):
163
  df = load_diabetes_df()
164
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
165
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
166
  color_vals = df[color_key].values
 
167
 
 
168
  fig_biplot = build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale)
169
+ # 3D (minstens 3 componenten)
170
  if Z.shape[1] < 3:
171
+ pca3 = PCA(n_components=3); Z3 = pca3.fit_transform(Xs)
 
172
  else:
173
  Z3 = Z[:, :3]
174
  fig3d = build_pca3d(Z3, color_vals)
175
  fig_variance = build_variance_plot(expl)
 
176
  fig_hist, fig_box = build_hist_box(df, color_key)
177
 
178
  # Tabel top-features
 
195
 
196
  summary_md = f"""
197
  ### Wat zie je hier?
198
+ - **Klik op _Update visualisaties_** om alles te verversen met jouw keuze.
199
  - **Hover** over punten voor exacte waarden (BMI, bloeddruk, cholesterol, glucose, leeftijd, geslacht, etc.).
200
  - **2D-biplot** met pijlen (belangrijkste metingen) en **3D-view** voor extra diepte.
201
  - **Uitlegvariantieplot**: laat zien hoeveel variatie elke component uitlegt.
202
+ - **Histogram + boxplot**: verdeling en spreiding van de gekozen meting ({FEATURE_LABELS.get(color_key,color_key)}).
 
203
  """
204
+ return fig_biplot, fig3d, fig_variance, table, overview_df, overview_note, summary_md
205
 
206
  def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85, n_components=10, standardize=True, frames=40, pause=0.0):
207
  df = load_diabetes_df()
 
210
  color_vals = df[color_key].values
211
  for i in range(frames):
212
  t = i / max(1, frames-1)
213
+ w1 = min(1.0, t * 2.0); w2 = max(0.0, (t - 0.5) * 2.0)
 
214
  coords = np.column_stack([Z[:, 0] * w1, Z[:, 1] * w2])
215
+ fig = plt.figure(figsize=(7.6, 5.2))
216
  ax = fig.add_subplot(111)
217
  ax.scatter(coords[:, 0], coords[:, 1], c=color_vals, s=point_size, alpha=alpha)
218
  ax.set_xlabel("PC1 (opbouw)"); ax.set_ylabel("PC2 (opbouw)")
219
  title = "PCA-projectie (animatie) — " + ("PC1 →" if w2 == 0 else "PC1 + PC2")
220
  ax.set_title(f"{title} — frame {i+1}/{frames}")
221
+ ax.axhline(0, color="grey", linewidth=0.6, linestyle=":"); ax.axvline(0, color="grey", linewidth=0.6, linestyle=":")
222
+ ax.grid(True, linestyle=":", linewidth=0.6); fig.tight_layout()
 
 
223
  yield fig
224
  if pause > 0:
225
  time.sleep(pause)
 
230
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
231
  fig = build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale, point_size=point_size, alpha=alpha)
232
  path = f"/mnt/data/biplot_{int(time.time())}.png"
233
+ fig.savefig(path, dpi=150, bbox_inches="tight"); plt.close(fig)
 
234
  return path
235
 
236
  def export_variance_png(n_components=10, standardize=True):
 
238
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
239
  fig = build_variance_plot(expl)
240
  path = f"/mnt/data/variance_{int(time.time())}.png"
241
+ fig.savefig(path, dpi=150, bbox_inches="tight"); plt.close(fig)
 
242
  return path
243
 
244
+ # -------------------- UI --------------------
245
+ with gr.Blocks(title="PCA Dashboard — Diabetes (netjes & compleet)") as demo:
 
 
246
  gr.HTML("""
247
  <style>
 
248
  .callout {padding:12px 14px; border-left:4px solid #2563eb; background:#f1f5f9; border-radius:8px; margin: 8px 0 18px;}
249
+ .cta {padding:10px 12px; border:1px dashed #2563eb; background:#eff6ff; border-radius:8px; margin-top:6px;}
250
  </style>
251
  """)
252
 
253
+ gr.Markdown("# PCA Dashboard — Diabetes (netjes & compleet)")
254
  gr.Markdown(MEDICAL_MD)
255
+ gr.HTML('<div class="callout"><b>Belangrijk:</b> kies links je instellingen en klik daarna op <b>Update visualisaties</b>. Wil je de stap-voor-stap projectie zien? Klik op <b>â–¶ Animate PCA</b>.</div>')
256
 
257
  with gr.Row():
258
  with gr.Column(scale=1):
 
263
  n_components = gr.Slider(3, 10, value=10, step=1, label="Aantal PCA-componenten")
264
  standardize = gr.Checkbox(value=True, label="Standaardiseer metingen (aanbevolen)")
265
  arrow_scale = gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="Pijl-schaal (2D-biplot)")
266
+ run_btn = gr.Button("🔄 Update visualisaties")
267
+ gr.HTML('<div class="cta"><b>Klik hierna op: "🔄 Update visualisaties"</b> om alle grafieken te verversen.</div>')
268
  with gr.Group():
269
  gr.Markdown("### Animatie")
270
  animate_btn = gr.Button("â–¶ Animate PCA (PC1 → PC2)")
271
+ gr.HTML('<div class="cta"><b>Klik op: "â–¶ Animate PCA"</b> om de projectie stap-voor-stap te zien.</div>')
272
+ anim_plot = gr.Plot(label="Animatie van projectie", height=420)
273
  with gr.Group():
274
  gr.Markdown("### Downloads")
275
  dl_biplot = gr.DownloadButton("Download biplot (PNG)")
 
279
  with gr.Row():
280
  with gr.Column():
281
  gr.Markdown("### Biplot (2D, hover)")
282
+ plot_biplot = gr.Plot(height=520)
283
  with gr.Column():
284
  gr.Markdown("### 3D PCA (PC1–PC3)")
285
+ plot3d = gr.Plot(height=520)
286
  with gr.Row():
287
  with gr.Column():
288
  gr.Markdown("### Uitlegvariantie")
289
+ plot_expl = gr.Plot(height=360)
290
  with gr.Column():
291
+ gr.Markdown("### Top-features (PC1 / PC2)")
292
+ table = gr.Dataframe(headers=["Feature (PC1)", "Loading PC1", "Feature (PC2)", "Loading PC2"], row_count=6, height=360)
293
  with gr.Row():
294
  with gr.Column():
295
  gr.Markdown("### Histogram")
296
+ plot_hist = gr.Plot(height=360)
297
  with gr.Column():
298
  gr.Markdown("### Boxplot")
299
+ plot_box = gr.Plot(height=360)
300
  with gr.Row():
 
 
 
301
  with gr.Column():
302
  gr.Markdown("### Overzicht (gemiddelden & verdeling)")
303
  overview_tbl = gr.Dataframe(interactive=False)
304
+ with gr.Column():
305
+ gr.Markdown("### Samenvatting")
306
+ summary = gr.Markdown()
307
 
308
  inputs = [color_feat, n_components, standardize, arrow_scale]
309
  run_btn.click(fn=controller, inputs=inputs,
310
+ outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, gr.Markdown(), summary])
311
  demo.load(fn=controller, inputs=inputs,
312
+ outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, gr.Markdown(), summary])
313
+
314
+ animate_btn.click(fn=animate_pca, inputs=[color_feat], outputs=anim_plot)
315
+
316
+ dl_biplot.click(fn=export_biplot_png, inputs=[color_feat, arrow_scale], outputs=[dl_biplot])
317
+ dl_var.click(fn=export_variance_png, inputs=[], outputs=[dl_var])
 
 
 
 
 
 
 
318
 
319
  if __name__ == "__main__":
320
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, show_api=False)