Marcel0123 commited on
Commit
597caf5
Β·
verified Β·
1 Parent(s): 7f76c44

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +336 -56
app.py CHANGED
@@ -8,7 +8,11 @@ from sklearn.preprocessing import StandardScaler
8
  from sklearn.decomposition import PCA
9
  import plotly.graph_objects as go
10
  import plotly.express as px
 
11
 
 
 
 
12
  FEATURE_LABELS = {
13
  "age": "Leeftijd",
14
  "sex": "Geslacht",
@@ -24,109 +28,385 @@ FEATURE_LABELS = {
24
  }
25
  LABEL_TO_KEY = {v: k for k, v in FEATURE_LABELS.items()}
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def load_diabetes_df():
28
  d = datasets.load_diabetes()
29
- X = pd.DataFrame(d.data, columns=d.feature_names)
30
  y = pd.Series(d.target, name="target")
31
  df = X.copy()
32
  df["target"] = y
33
  return df
34
 
35
- def compute_pca(df, n_components=10, standardize=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  feats = [c for c in df.columns if c != "target"]
37
  X = df[feats].values
38
  if standardize:
39
- scaler = StandardScaler()
40
  Xs = scaler.fit_transform(X)
41
  else:
42
  Xs = X
43
- pca = PCA(n_components=min(n_components, Xs.shape[1]))
44
  Z = pca.fit_transform(Xs)
45
  loadings = pca.components_.T
46
  expl = pca.explained_variance_ratio_
47
  return feats, Xs, Z, loadings, expl
48
 
49
- def build_biplot(df, Z, loadings, feats, color_key):
 
 
 
 
50
  hover_text = []
 
51
  for idx in range(len(df)):
52
- parts = [f"{FEATURE_LABELS.get(k,k)}: {df.iloc[idx][k]:.3f}" for k in df.columns]
53
  hover_text.append("<br>".join(parts))
 
54
  fig = go.Figure()
55
- fig.add_trace(go.Scatter(x=Z[:,0], y=Z[:,1], mode="markers",
56
- marker=dict(size=7, color=df[color_key].values),
57
- text=hover_text,
58
- hovertemplate="%{text}<extra></extra>"))
59
- # arrows
60
- scale=2.0
61
- for i,key in enumerate(feats):
62
- x = loadings[i,0]*scale; y = loadings[i,1]*scale
63
- fig.add_annotation(x=x, y=y, ax=0, ay=0, showarrow=True, arrowhead=3)
64
- fig.add_annotation(x=x*1.05, y=y*1.05, text=FEATURE_LABELS.get(key,key), showarrow=False, font=dict(size=10))
65
- fig.update_layout(title="PCA-biplot (2D, hover)", xaxis_title="PC1", yaxis_title="PC2", height=500)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return fig
67
 
68
  def build_pca3d(Z3, color_vals):
69
- fig = go.Figure(data=[go.Scatter3d(x=Z3[:,0], y=Z3[:,1], z=Z3[:,2], mode="markers",
70
- marker=dict(size=4,color=color_vals,opacity=0.85))])
71
- fig.update_layout(title="PCA 3D", scene=dict(xaxis_title="PC1", yaxis_title="PC2", zaxis_title="PC3"), height=500)
 
 
 
 
 
 
 
 
72
  return fig
73
 
74
  def build_variance_plot(expl):
75
- fig = plt.figure()
76
  ax = fig.add_subplot(111)
77
- xs = np.arange(1,len(expl)+1)
78
- ax.bar(xs, expl); ax.plot(xs, np.cumsum(expl), marker="o")
79
- ax.set_xlabel("PC"); ax.set_ylabel("Explained variance ratio")
80
- fig.tight_layout()
 
 
 
 
 
81
  return fig
82
 
83
- def build_corr_heatmap(df):
84
  feats = [c for c in df.columns if c != "target"]
85
  corr = pd.DataFrame(df[feats]).corr()
86
  order = corr.abs().sum().sort_values(ascending=False).index.tolist()
87
  corr_sorted = corr.loc[order, order]
88
- fig = px.imshow(corr_sorted, color_continuous_scale="RdBu", origin="lower", zmin=-1, zmax=1)
89
- fig.update_layout(title="Correlatie-heatmap")
 
 
90
  return fig
91
 
92
- def build_hist_box(df, color_key):
93
- series = df[color_key]
94
  fig_hist = px.histogram(series, nbins=30, title=f"Histogram β€” {FEATURE_LABELS.get(color_key,color_key)}")
95
  fig_box = px.box(series, points="outliers", title=f"Boxplot β€” {FEATURE_LABELS.get(color_key,color_key)}")
96
  return fig_hist, fig_box
97
 
98
- def controller(color_label="BMI (Body Mass Index)", n_components=10, standardize=True):
 
 
 
99
  df = load_diabetes_df()
100
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
101
- color_key = LABEL_TO_KEY[color_label]
102
- fig2d = build_biplot(df, Z, loadings, feats, color_key)
 
 
 
 
 
103
  if Z.shape[1] < 3:
104
- pca3 = PCA(n_components=3); Z3 = pca3.fit_transform(Xs)
 
105
  else:
106
- Z3 = Z[:,:3]
107
- fig3d = build_pca3d(Z3, df[color_key].values)
108
- fig_var = build_variance_plot(expl)
109
- fig_heat = build_corr_heatmap(df)
110
  fig_hist, fig_box = build_hist_box(df, color_key)
111
- return fig2d, fig3d, fig_var, fig_heat, fig_hist, fig_box
112
 
113
- with gr.Blocks() as demo:
114
- gr.Markdown("# PCA Dashboard β€” Diabetes")
115
- with gr.Row():
116
- color_feat = gr.Dropdown(choices=list(LABEL_TO_KEY.keys()), value="BMI (Body Mass Index)", label="Kleur op meting")
117
- n_components = gr.Slider(3,10,10,step=1,label="Aantal PCA-componenten")
118
- standardize = gr.Checkbox(value=True,label="Standaardiseer")
119
- run_btn = gr.Button("Update")
120
- with gr.Row():
121
- plot2d = gr.Plot(); plot3d = gr.Plot()
122
- with gr.Row():
123
- plot_var = gr.Plot(); plot_heat = gr.Plot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  with gr.Row():
125
- plot_hist = gr.Plot(); plot_box = gr.Plot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- inputs=[color_feat,n_components,standardize]
128
- run_btn.click(fn=controller, inputs=inputs, outputs=[plot2d,plot3d,plot_var,plot_heat,plot_hist,plot_box])
129
- demo.load(fn=controller, inputs=inputs, outputs=[plot2d,plot3d,plot_var,plot_heat,plot_hist,plot_box])
 
 
 
 
130
 
131
- if __name__=="__main__":
132
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
8
  from sklearn.decomposition import PCA
9
  import plotly.graph_objects as go
10
  import plotly.express as px
11
+ import time
12
 
13
+ # ======================
14
+ # NL labels
15
+ # ======================
16
  FEATURE_LABELS = {
17
  "age": "Leeftijd",
18
  "sex": "Geslacht",
 
28
  }
29
  LABEL_TO_KEY = {v: k for k, v in FEATURE_LABELS.items()}
30
 
31
+ MEDICAL_MD = """
32
+ ### Medisch nut
33
+
34
+ **Wat zien we hier?**
35
+ Ik heb een bestaande, anonieme gezondheidsdataset gebruikt die speciaal beschikbaar is gemaakt voor onderzoek en studie. In deze gegevens staan metingen van een grote groep patiΓ«nten, zoals **bloedwaarden, BMI, cholesterol en bloedsuiker**.
36
+
37
+ Zo'n enorme berg cijfers is voor artsen en ziekenhuizen bijna niet in één keer te overzien. Het is gewoon te veel om met het blote oog patronen uit te halen.
38
+
39
+ **Daar komt kunstmatige intelligentie om de hoek kijken.**
40
+ Met deze techniek (PCA) kan de computer de data slim samenvatten en patronen zichtbaar maken. Dit programma dat ik heb ontworpen laat live zien hoe die samenvatting werkt.
41
+
42
+ - Elke punt is één patiënt.
43
+ - De kleur laat zien hoe hoog of laag een bepaalde meting is (standaard: BMI).
44
+ - De pijlen (in de 2D-biplot) laten zien welke metingen het meeste invloed hebben.
45
+ - Links bovenin kun je kiezen welke meting je als uitgangspunt wilt nemen.
46
+
47
+ **En wat heb je hieraan?**
48
+ In de praktijk gebruiken artsen en onderzoekers zo'n plot om patronen en verbanden te ontdekken. πŸ‘‰ Het is dus niet alleen een mooi plaatje, maar echt een manier om grote hoeveelheden data sneller en slimmer te begrijpen.
49
+
50
+ Met AI kunnen we patronen vinden die je met het blote oog nooit zou zien. Dat maakt dit niet alleen een mooie visualisatie, maar ook een knap stukje technologie met Γ©chte waarde voor onderzoek en zorg.
51
+
52
+ **Speel zelf de onderzoeker!**
53
+ Doe alsof je een arts bent en kies links bovenin een waarde, bijvoorbeeld **cholesterol**, **leeftijd** of **geslacht**. Klik daarna op *Update visualisaties* en ontdek je eigen patronen in de data.
54
+ """
55
+
56
+ # ======================
57
+ # Data helpers
58
+ # ======================
59
  def load_diabetes_df():
60
  d = datasets.load_diabetes()
61
+ X = pd.DataFrame(d.data, columns=d.feature_names) # standardized features
62
  y = pd.Series(d.target, name="target")
63
  df = X.copy()
64
  df["target"] = y
65
  return df
66
 
67
+ def compute_overview_table(df: pd.DataFrame):
68
+ """Gemiddelde + % boven/onder gemiddelde voor kernmetingen (gestandaardiseerd)."""
69
+ keys = ["bmi","bp","s1","s2","s3","s4","s5","s6"]
70
+ rows = []
71
+ for k in keys:
72
+ vals = df[k].dropna().values
73
+ if vals.size == 0:
74
+ continue
75
+ mean = float(vals.mean())
76
+ pct_above = float((vals > 0).mean() * 100.0) # 0 β‰ˆ globaal gemiddelde
77
+ pct_below = float((vals < 0).mean() * 100.0)
78
+ rows.append({
79
+ "Meting": FEATURE_LABELS.get(k, k),
80
+ "Gemiddelde (gestandaardiseerd)": round(mean, 3),
81
+ "% boven gemiddelde": round(pct_above, 1),
82
+ "% onder gemiddelde": round(pct_below, 1),
83
+ })
84
+ table = pd.DataFrame(rows)
85
+ note = (
86
+ "Let op: waarden in deze dataset zijn **gestandaardiseerd**. "
87
+ "`0` betekent ongeveer het **algemene gemiddelde**. "
88
+ "Positief = hoger dan gemiddeld, negatief = lager dan gemiddeld."
89
+ )
90
+ return table, note
91
+
92
+ def compute_top_correlations(df: pd.DataFrame, top_n: int = 6):
93
+ feats = [c for c in df.columns if c != "target"]
94
+ corr = pd.DataFrame(df[feats]).corr()
95
+ pairs = []
96
+ for i, a in enumerate(feats):
97
+ for j, b in enumerate(feats):
98
+ if j <= i:
99
+ continue
100
+ pairs.append({
101
+ "Combinatie": f"{FEATURE_LABELS.get(a,a)} ↔ {FEATURE_LABELS.get(b,b)}",
102
+ "Correlatie": corr.loc[a, b]
103
+ })
104
+ out = pd.DataFrame(pairs)
105
+ out["Sterkte (|r|)"] = out["Correlatie"].abs()
106
+ out = out.sort_values("Sterkte (|r|)", ascending=False).head(top_n).reset_index(drop=True)
107
+ out["Correlatie"] = out["Correlatie"].round(3)
108
+ out["Sterkte (|r|)"] = out["Sterkte (|r|)"].round(3)
109
+ return out[["Combinatie", "Correlatie", "Sterkte (|r|)"]]
110
+
111
+ # ======================
112
+ # PCA helpers
113
+ # ======================
114
+ def compute_pca(df: pd.DataFrame, n_components: int, standardize: bool):
115
  feats = [c for c in df.columns if c != "target"]
116
  X = df[feats].values
117
  if standardize:
118
+ scaler = StandardScaler(with_mean=True, with_std=True)
119
  Xs = scaler.fit_transform(X)
120
  else:
121
  Xs = X
122
+ pca = PCA(n_components=min(int(n_components), Xs.shape[1]))
123
  Z = pca.fit_transform(Xs)
124
  loadings = pca.components_.T
125
  expl = pca.explained_variance_ratio_
126
  return feats, Xs, Z, loadings, expl
127
 
128
+ # ======================
129
+ # Plot builders
130
+ # ======================
131
+ def build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=2.0):
132
+ # Hovertext met relevante velden
133
  hover_text = []
134
+ fields = ["bmi","bp","s1","s2","s3","s4","s5","s6","age","sex","target"]
135
  for idx in range(len(df)):
136
+ parts = [f"{FEATURE_LABELS.get(k,k)}: {df.iloc[idx][k]:.3f}" for k in fields]
137
  hover_text.append("<br>".join(parts))
138
+
139
  fig = go.Figure()
140
+ fig.add_trace(go.Scatter(
141
+ x=Z[:, 0], y=Z[:, 1],
142
+ mode="markers",
143
+ marker=dict(size=8, color=df[color_key].values),
144
+ text=hover_text,
145
+ hovertemplate="%{text}<extra></extra>"
146
+ ))
147
+
148
+ # Loading arrows (PC1/PC2)
149
+ for i, key in enumerate(feats):
150
+ x = loadings[i, 0] * arrow_scale
151
+ y = loadings[i, 1] * arrow_scale
152
+ fig.add_annotation(x=x, y=y, ax=0, ay=0,
153
+ xref="x", yref="y", axref="x", ayref="y",
154
+ showarrow=True, arrowhead=3)
155
+ fig.add_annotation(x=x*1.05, y=y*1.05, text=FEATURE_LABELS.get(key, key),
156
+ showarrow=False, font=dict(size=10))
157
+
158
+ fig.update_layout(
159
+ title="PCA-biplot (2D, hover voor details)",
160
+ xaxis_title="PC1",
161
+ yaxis_title="PC2",
162
+ height=520,
163
+ margin=dict(l=0, r=0, t=40, b=0)
164
+ )
165
+ return fig
166
+
167
+ def build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=2.0, point_size=32, alpha=0.85):
168
+ # Matplotlib variant voor PNG-export
169
+ fig = plt.figure(figsize=(7.8, 5.6))
170
+ ax = fig.add_subplot(111)
171
+ sc = ax.scatter(Z[:, 0], Z[:, 1], c=df[color_key].values, s=point_size, alpha=alpha)
172
+ cbar = plt.colorbar(sc, ax=ax, pad=0.02)
173
+ cbar.set_label(f"Kleur: {FEATURE_LABELS.get(color_key, color_key)}")
174
+ ax.set_xlabel("PC1"); ax.set_ylabel("PC2")
175
+ ax.set_title("PCA-biplot β€” punten + pijlen")
176
+ for i, key in enumerate(feats):
177
+ x = loadings[i, 0] * arrow_scale
178
+ y = loadings[i, 1] * arrow_scale
179
+ ax.arrow(0, 0, x, y, head_width=0.05, head_length=0.08, fc="k", ec="k", length_includes_head=True)
180
+ ax.text(x*1.08, y*1.08, FEATURE_LABELS.get(key, key), fontsize=9, ha="center", va="center")
181
+ ax.axhline(0, color="grey", linewidth=0.6, linestyle=":")
182
+ ax.axvline(0, color="grey", linewidth=0.6, linestyle=":")
183
+ ax.grid(True, linestyle=":", linewidth=0.6)
184
+ plt.tight_layout()
185
  return fig
186
 
187
  def build_pca3d(Z3, color_vals):
188
+ fig = go.Figure(data=[go.Scatter3d(
189
+ x=Z3[:, 0], y=Z3[:, 1], z=Z3[:, 2],
190
+ mode="markers",
191
+ marker=dict(size=4, color=color_vals, opacity=0.85)
192
+ )])
193
+ fig.update_layout(
194
+ title="PCA 3D β€” PC1 Β· PC2 Β· PC3 (sleep om te draaien)",
195
+ scene=dict(xaxis_title="PC1", yaxis_title="PC2", zaxis_title="PC3"),
196
+ margin=dict(l=0, r=0, t=40, b=0),
197
+ height=520
198
+ )
199
  return fig
200
 
201
  def build_variance_plot(expl):
202
+ fig = plt.figure(figsize=(7.8, 3.8))
203
  ax = fig.add_subplot(111)
204
+ xs = np.arange(1, len(expl) + 1)
205
+ ax.bar(xs, expl, width=0.8, align="center")
206
+ ax.plot(xs, np.cumsum(expl), marker="o")
207
+ ax.set_xticks(xs)
208
+ ax.set_xlabel("Principal Component")
209
+ ax.set_ylabel("Explained variance ratio")
210
+ ax.set_title("Uitlegvariantie per component (balken) + cumulatief (lijn)")
211
+ ax.grid(True, linestyle=":", linewidth=0.6)
212
+ plt.tight_layout()
213
  return fig
214
 
215
+ def build_corr_heatmap(df: pd.DataFrame):
216
  feats = [c for c in df.columns if c != "target"]
217
  corr = pd.DataFrame(df[feats]).corr()
218
  order = corr.abs().sum().sort_values(ascending=False).index.tolist()
219
  corr_sorted = corr.loc[order, order]
220
+ fig = px.imshow(corr_sorted, text_auto=False, aspect="auto",
221
+ color_continuous_scale="RdBu", origin="lower", zmin=-1, zmax=1)
222
+ fig.update_layout(title="Correlatie-heatmap (gesorteerd op sterkte)",
223
+ height=520, margin=dict(l=0, r=0, t=40, b=0))
224
  return fig
225
 
226
+ def build_hist_box(df: pd.DataFrame, color_key: str):
227
+ series = df[color_key].dropna()
228
  fig_hist = px.histogram(series, nbins=30, title=f"Histogram β€” {FEATURE_LABELS.get(color_key,color_key)}")
229
  fig_box = px.box(series, points="outliers", title=f"Boxplot β€” {FEATURE_LABELS.get(color_key,color_key)}")
230
  return fig_hist, fig_box
231
 
232
+ # ======================
233
+ # Controllers
234
+ # ======================
235
+ def controller(color_label="BMI (Body Mass Index)", n_components=10, standardize=True, arrow_scale=2.0):
236
  df = load_diabetes_df()
237
  feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
238
+ color_key = LABEL_TO_KEY.get(color_label, "bmi")
239
+ color_vals = df[color_key].values
240
+ color_label_nl = FEATURE_LABELS.get(color_key, color_key)
241
+
242
+ # Plots
243
+ fig_biplot = build_biplot_plotly(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale)
244
+ # 3D (zorg voor minstens 3 componenten)
245
  if Z.shape[1] < 3:
246
+ pca3 = PCA(n_components=3)
247
+ Z3 = pca3.fit_transform(Xs)
248
  else:
249
+ Z3 = Z[:, :3]
250
+ fig3d = build_pca3d(Z3, color_vals)
251
+ fig_variance = build_variance_plot(expl)
252
+ fig_heatmap = build_corr_heatmap(df)
253
  fig_hist, fig_box = build_hist_box(df, color_key)
 
254
 
255
+ # Tabel top-features
256
+ load_df = pd.DataFrame({
257
+ "feature_key": feats,
258
+ "PC1_loading": loadings[:, 0],
259
+ "PC2_loading": loadings[:, 1],
260
+ "PC1_abs": np.abs(loadings[:, 0]),
261
+ "PC2_abs": np.abs(loadings[:, 1]),
262
+ })
263
+ load_df["Feature (PC1)"] = load_df["feature_key"].map(lambda k: FEATURE_LABELS.get(k, k))
264
+ load_df["Feature (PC2)"] = load_df["feature_key"].map(lambda k: FEATURE_LABELS.get(k, k))
265
+ top_pc1 = load_df.sort_values("PC1_abs", ascending=False)[["Feature (PC1)", "PC1_loading"]].head(6).reset_index(drop=True)
266
+ top_pc2 = load_df.sort_values("PC2_abs", ascending=False)[["Feature (PC2)", "PC2_loading"]].head(6).reset_index(drop=True)
267
+ max_len = max(len(top_pc1), len(top_pc2))
268
+ top_pc1 = top_pc1.reindex(range(max_len)); top_pc2 = top_pc2.reindex(range(max_len))
269
+ table = pd.concat([top_pc1, top_pc2], axis=1)
270
+
271
+ overview_df, overview_note = compute_overview_table(df)
272
+
273
+ summary_md = f"""
274
+ ### Wat zie je hier?
275
+ - **Hover** over punten voor exacte waarden (BMI, bloeddruk, cholesterol, glucose, leeftijd, geslacht, etc.).
276
+ - **2D-biplot** met pijlen (belangrijkste metingen) en **3D-view** voor extra diepte.
277
+ - **Uitlegvariantieplot**: laat zien hoeveel variatie elke component uitlegt.
278
+ - **Correlatie-heatmap**: toont welke metingen samen bewegen (gesorteerd op sterkte).
279
+ - **Histogram + boxplot**: verdeling en spreiding van de gekozen meting ({color_label_nl}).
280
+ """
281
+ return fig_biplot, fig3d, fig_variance, table, overview_df, overview_note, summary_md, fig_heatmap, fig_hist, fig_box
282
+
283
+ def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85, n_components=10, standardize=True, frames=40, pause=0.0):
284
+ df = load_diabetes_df()
285
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
286
+ color_key = LABEL_TO_KEY.get(color_label, "bmi")
287
+ color_vals = df[color_key].values
288
+ for i in range(frames):
289
+ t = i / max(1, frames-1)
290
+ w1 = min(1.0, t * 2.0)
291
+ w2 = max(0.0, (t - 0.5) * 2.0)
292
+ coords = np.column_stack([Z[:, 0] * w1, Z[:, 1] * w2])
293
+ fig = plt.figure(figsize=(7.8, 5.6))
294
+ ax = fig.add_subplot(111)
295
+ ax.scatter(coords[:, 0], coords[:, 1], c=color_vals, s=point_size, alpha=alpha)
296
+ ax.set_xlabel("PC1 (opbouw)"); ax.set_ylabel("PC2 (opbouw)")
297
+ title = "PCA-projectie (animatie) β€” " + ("PC1 β†’" if w2 == 0 else "PC1 + PC2")
298
+ ax.set_title(f"{title} β€” frame {i+1}/{frames}")
299
+ ax.axhline(0, color="grey", linewidth=0.6, linestyle=":")
300
+ ax.axvline(0, color="grey", linewidth=0.6, linestyle=":")
301
+ ax.grid(True, linestyle=":", linewidth=0.6)
302
+ plt.tight_layout()
303
+ yield fig
304
+ if pause > 0:
305
+ time.sleep(pause)
306
+
307
+ def export_biplot_png(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=32, alpha=0.85, n_components=10, standardize=True):
308
+ df = load_diabetes_df()
309
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
310
+ color_key = LABEL_TO_KEY.get(color_label, "bmi")
311
+ fig = build_biplot_matplotlib(df, Z, loadings, feats, color_key, arrow_scale=arrow_scale, point_size=point_size, alpha=alpha)
312
+ path = f"/mnt/data/biplot_{int(time.time())}.png"
313
+ fig.savefig(path, dpi=150, bbox_inches="tight")
314
+ plt.close(fig)
315
+ return path
316
+
317
+ def export_variance_png(n_components=10, standardize=True):
318
+ df = load_diabetes_df()
319
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
320
+ fig = build_variance_plot(expl)
321
+ path = f"/mnt/data/variance_{int(time.time())}.png"
322
+ fig.savefig(path, dpi=150, bbox_inches="tight")
323
+ plt.close(fig)
324
+ return path
325
+
326
+ # ======================
327
+ # UI
328
+ # ======================
329
+ with gr.Blocks(title="PCA Dashboard β€” Diabetes (compleet)") as demo:
330
+ gr.HTML("""
331
+ <style>
332
+ .card {background:#fff; border:1px solid #e5e7eb; border-radius:12px; padding:14px; box-shadow: 0 1px 4px rgba(0,0,0,0.06);}
333
+ .callout {padding:12px 14px; border-left:4px solid #2563eb; background:#f1f5f9; border-radius:8px; margin: 8px 0 18px;}
334
+ .smallnote {font-size: 0.92em; opacity: 0.85;}
335
+ </style>
336
+ """)
337
+
338
+ gr.Markdown("# PCA Dashboard β€” Diabetes (compleet)")
339
+ gr.Markdown(MEDICAL_MD)
340
+
341
  with gr.Row():
342
+ with gr.Column(scale=1):
343
+ with gr.Group():
344
+ gr.Markdown("### Instellingen")
345
+ color_choices = [FEATURE_LABELS[k] for k in ["bmi","bp","s1","s2","s3","s4","s5","s6","age","sex","target"]]
346
+ color_feat = gr.Dropdown(choices=color_choices, value=FEATURE_LABELS["bmi"], label="Kleur op meting")
347
+ n_components = gr.Slider(3, 10, value=10, step=1, label="Aantal PCA-componenten")
348
+ standardize = gr.Checkbox(value=True, label="Standaardiseer metingen (aanbevolen)")
349
+ arrow_scale = gr.Slider(0.5, 5.0, value=2.0, step=0.1, label="Pijl-schaal (2D-biplot)")
350
+ run_btn = gr.Button("Update visualisaties")
351
+ gr.HTML('<div class="callout smallnote">πŸ’‘ <b>Tip:</b> Kies links een meting (bijv. BMI of cholesterol) en klik daarna op <b>Update visualisaties</b>.</div>')
352
+ with gr.Group():
353
+ gr.Markdown("### Animatie")
354
+ animate_btn = gr.Button("β–Ά Animate PCA (PC1 β†’ PC2)")
355
+ anim_plot = gr.Plot(label="Animatie van projectie")
356
+ with gr.Group():
357
+ gr.Markdown("### Downloads")
358
+ dl_biplot = gr.DownloadButton("Download biplot (PNG)")
359
+ dl_var = gr.DownloadButton("Download variatieplot (PNG)")
360
+
361
+ with gr.Column(scale=2):
362
+ with gr.Row():
363
+ with gr.Column():
364
+ gr.Markdown("### Biplot (2D, hover)")
365
+ plot_biplot = gr.Plot()
366
+ with gr.Column():
367
+ gr.Markdown("### 3D PCA (PC1–PC3)")
368
+ plot3d = gr.Plot()
369
+ with gr.Row():
370
+ with gr.Column():
371
+ gr.Markdown("### Uitlegvariantie")
372
+ plot_expl = gr.Plot()
373
+ with gr.Column():
374
+ gr.Markdown("### Correlatie-heatmap")
375
+ plot_heat = gr.Plot()
376
+ with gr.Row():
377
+ with gr.Column():
378
+ gr.Markdown("### Histogram")
379
+ plot_hist = gr.Plot()
380
+ with gr.Column():
381
+ gr.Markdown("### Boxplot")
382
+ plot_box = gr.Plot()
383
+ with gr.Row():
384
+ with gr.Column():
385
+ gr.Markdown("### Top-features (PC1 / PC2)")
386
+ table = gr.Dataframe(headers=["Feature (PC1)", "Loading PC1", "Feature (PC2)", "Loading PC2"], row_count=6)
387
+ with gr.Column():
388
+ gr.Markdown("### Overzicht (gemiddelden & verdeling)")
389
+ overview_tbl = gr.Dataframe(interactive=False)
390
+ summary = gr.Markdown()
391
+ overview_note_md = gr.Markdown()
392
+
393
+ inputs = [color_feat, n_components, standardize, arrow_scale]
394
+ run_btn.click(fn=controller, inputs=inputs,
395
+ outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, overview_note_md, summary, plot_heat, plot_hist, plot_box])
396
+ demo.load(fn=controller, inputs=inputs,
397
+ outputs=[plot_biplot, plot3d, plot_expl, table, overview_tbl, overview_note_md, summary, plot_heat, plot_hist, plot_box])
398
+
399
+ animate_btn.click(fn=animate_pca,
400
+ inputs=[color_feat],
401
+ outputs=anim_plot)
402
 
403
+ # Downloads (PNG)
404
+ dl_biplot.click(fn=export_biplot_png,
405
+ inputs=[color_feat, arrow_scale],
406
+ outputs=[dl_biplot])
407
+ dl_var.click(fn=export_variance_png,
408
+ inputs=[],
409
+ outputs=[dl_var])
410
 
411
+ if __name__ == "__main__":
412
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, show_api=False)