Marcel0123 commited on
Commit
9f59116
·
verified ·
1 Parent(s): 8c252bd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +48 -16
  2. requirements.txt +1 -0
app.py CHANGED
@@ -6,7 +6,8 @@ import matplotlib.pyplot as plt
6
  from sklearn import datasets
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.decomposition import PCA
9
- import os, time
 
10
 
11
  FEATURE_LABELS = {
12
  "age": "Leeftijd",
@@ -109,7 +110,7 @@ def compute_pca(df: pd.DataFrame, n_components: int, standardize: bool):
109
  Z = pca.fit_transform(Xs)
110
  loadings = pca.components_.T
111
  expl = pca.explained_variance_ratio_
112
- return feats, Z, loadings, expl
113
 
114
  def build_biplot(Z, loadings, feats, color_vals, arrow_scale, point_size, alpha, color_label_nl):
115
  fig = plt.figure(figsize=(7.8, 5.6))
@@ -144,12 +145,27 @@ def build_variance_plot(expl):
144
  plt.tight_layout()
145
  return fig
146
 
147
- def pca_biplot(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=32, alpha=0.85, n_components=10, standardize=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  df = load_diabetes_df()
149
  overview_df, overview_note = compute_overview_table(df)
150
  corr_tbl = compute_top_correlations(df)
151
 
152
- feats, Z, loadings, expl = compute_pca(df, n_components, standardize)
153
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
154
  color_label_nl = FEATURE_LABELS.get(color_key, color_key)
155
  color_vals = df[color_key].values
@@ -179,21 +195,32 @@ def pca_biplot(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=
179
  - **Pijlen** = bijdrage van **metingen** aan de richting van **PC1/PC2**. **Langere pijlen** wegen zwaarder.
180
  - **Balkgrafiek** = per component hoeveel variatie hij uitlegt; **lijn** = cumulatief.
181
  """
182
- return fig1, fig2, table, summary_md, overview_df, overview_note, corr_tbl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85, n_components=10, standardize=True, frames=40, pause=0.0):
185
  df = load_diabetes_df()
186
- feats, Z, loadings, expl = compute_pca(df, n_components, standardize)
187
-
188
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
189
  color_vals = df[color_key].values
190
-
191
  for i in range(frames):
192
- t = i / max(1, frames-1) # 0..1
193
  w1 = min(1.0, t * 2.0)
194
  w2 = max(0.0, (t - 0.5) * 2.0)
195
  coords = np.column_stack([Z[:, 0] * w1, Z[:, 1] * w2])
196
-
197
  fig = plt.figure(figsize=(7.8, 5.6))
198
  ax = fig.add_subplot(111)
199
  ax.scatter(coords[:, 0], coords[:, 1], c=color_vals, s=point_size, alpha=alpha)
@@ -210,7 +237,7 @@ def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85,
210
 
211
  def export_biplot_png(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=32, alpha=0.85, n_components=10, standardize=True):
212
  df = load_diabetes_df()
213
- feats, Z, loadings, expl = compute_pca(df, n_components, standardize)
214
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
215
  color_label_nl = FEATURE_LABELS.get(color_key, color_key)
216
  color_vals = df[color_key].values
@@ -222,7 +249,7 @@ def export_biplot_png(color_label="BMI (Body Mass Index)", arrow_scale=2.0, poin
222
 
223
  def export_variance_png(n_components=10, standardize=True):
224
  df = load_diabetes_df()
225
- feats, Z, loadings, expl = compute_pca(df, n_components, standardize)
226
  fig = build_variance_plot(expl)
227
  path = f"/mnt/data/variance_{int(time.time())}.png"
228
  fig.savefig(path, dpi=150, bbox_inches="tight")
@@ -252,6 +279,7 @@ with gr.Blocks(title="PCA Biplot — Diabetes (Dashboard)") as demo:
252
  alpha = gr.Slider(0.2, 1.0, value=0.85, step=0.05, label="Transparantie (punten)")
253
  n_components = gr.Slider(2, 10, value=10, step=1, label="Aantal PCA-componenten")
254
  standardize = gr.Checkbox(value=True, label="Standaardiseer metingen (aanbevolen)")
 
255
  run_btn = gr.Button("Update visualisaties")
256
  gr.HTML('<div class="callout smallnote">💡 <b>Tip:</b> Kies links een meting (bijv. BMI of cholesterol) en klik daarna op <b>Update visualisaties</b>.</div>')
257
  with gr.Group():
@@ -269,7 +297,7 @@ with gr.Blocks(title="PCA Biplot — Diabetes (Dashboard)") as demo:
269
  with gr.Column(scale=2):
270
  with gr.Row():
271
  with gr.Column():
272
- gr.Markdown("### Biplot")
273
  plot_biplot = gr.Plot()
274
  with gr.Column():
275
  gr.Markdown("### Uitlegvariantie")
@@ -289,12 +317,16 @@ with gr.Blocks(title="PCA Biplot — Diabetes (Dashboard)") as demo:
289
  with gr.Column():
290
  gr.Markdown("### Top correlaties (features)")
291
  topcorr_tbl = gr.Dataframe(interactive=False)
 
 
 
 
292
 
293
- inputs = [color_feat, arrow_scale, point_size, alpha, n_components, standardize]
294
  run_btn.click(fn=pca_biplot, inputs=inputs,
295
- outputs=[plot_biplot, plot_expl, table, summary, overview_tbl, overview_note_md, topcorr_tbl])
296
  demo.load(fn=pca_biplot, inputs=inputs,
297
- outputs=[plot_biplot, plot_expl, table, summary, overview_tbl, overview_note_md, topcorr_tbl])
298
 
299
  animate_btn.click(fn=animate_pca,
300
  inputs=[color_feat, point_size, alpha, n_components, standardize],
 
6
  from sklearn import datasets
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.decomposition import PCA
9
+ import plotly.graph_objects as go
10
+ import time
11
 
12
  FEATURE_LABELS = {
13
  "age": "Leeftijd",
 
110
  Z = pca.fit_transform(Xs)
111
  loadings = pca.components_.T
112
  expl = pca.explained_variance_ratio_
113
+ return feats, Xs, Z, loadings, expl
114
 
115
  def build_biplot(Z, loadings, feats, color_vals, arrow_scale, point_size, alpha, color_label_nl):
116
  fig = plt.figure(figsize=(7.8, 5.6))
 
145
  plt.tight_layout()
146
  return fig
147
 
148
+ def build_pca3d(Z3, color_vals, color_label_nl, point_size, alpha):
149
+ # Plotly 3D scatter for real rotation/drag
150
+ fig = go.Figure(data=[go.Scatter3d(
151
+ x=Z3[:, 0], y=Z3[:, 1], z=Z3[:, 2],
152
+ mode="markers",
153
+ marker=dict(size=max(2, int(point_size/2)), color=color_vals, opacity=alpha)
154
+ )])
155
+ fig.update_layout(
156
+ title="PCA 3D — PC1 · PC2 · PC3",
157
+ scene=dict(xaxis_title="PC1", yaxis_title="PC2", zaxis_title="PC3"),
158
+ margin=dict(l=0, r=0, t=40, b=0),
159
+ height=520
160
+ )
161
+ return fig
162
+
163
+ def pca_biplot(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=32, alpha=0.85, n_components=10, standardize=True, show_3d=False):
164
  df = load_diabetes_df()
165
  overview_df, overview_note = compute_overview_table(df)
166
  corr_tbl = compute_top_correlations(df)
167
 
168
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
169
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
170
  color_label_nl = FEATURE_LABELS.get(color_key, color_key)
171
  color_vals = df[color_key].values
 
195
  - **Pijlen** = bijdrage van **metingen** aan de richting van **PC1/PC2**. **Langere pijlen** wegen zwaarder.
196
  - **Balkgrafiek** = per component hoeveel variatie hij uitlegt; **lijn** = cumulatief.
197
  """
198
+
199
+ # 3D
200
+ if show_3d:
201
+ # Zorg dat we minstens 3 componenten hebben
202
+ if Z.shape[1] < 3:
203
+ pca3 = PCA(n_components=3)
204
+ Z3 = pca3.fit_transform(Xs)
205
+ else:
206
+ Z3 = Z[:, :3]
207
+ fig3 = build_pca3d(Z3, color_vals, color_label_nl, point_size, alpha)
208
+ pca3d_out = gr.update(value=fig3, visible=True)
209
+ else:
210
+ pca3d_out = gr.update(value=None, visible=False)
211
+
212
+ return fig1, fig2, table, summary_md, overview_df, overview_note, corr_tbl, pca3d_out
213
 
214
  def animate_pca(color_label="BMI (Body Mass Index)", point_size=32, alpha=0.85, n_components=10, standardize=True, frames=40, pause=0.0):
215
  df = load_diabetes_df()
216
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
 
217
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
218
  color_vals = df[color_key].values
 
219
  for i in range(frames):
220
+ t = i / max(1, frames-1)
221
  w1 = min(1.0, t * 2.0)
222
  w2 = max(0.0, (t - 0.5) * 2.0)
223
  coords = np.column_stack([Z[:, 0] * w1, Z[:, 1] * w2])
 
224
  fig = plt.figure(figsize=(7.8, 5.6))
225
  ax = fig.add_subplot(111)
226
  ax.scatter(coords[:, 0], coords[:, 1], c=color_vals, s=point_size, alpha=alpha)
 
237
 
238
  def export_biplot_png(color_label="BMI (Body Mass Index)", arrow_scale=2.0, point_size=32, alpha=0.85, n_components=10, standardize=True):
239
  df = load_diabetes_df()
240
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
241
  color_key = LABEL_TO_KEY.get(color_label, "bmi")
242
  color_label_nl = FEATURE_LABELS.get(color_key, color_key)
243
  color_vals = df[color_key].values
 
249
 
250
  def export_variance_png(n_components=10, standardize=True):
251
  df = load_diabetes_df()
252
+ feats, Xs, Z, loadings, expl = compute_pca(df, n_components, standardize)
253
  fig = build_variance_plot(expl)
254
  path = f"/mnt/data/variance_{int(time.time())}.png"
255
  fig.savefig(path, dpi=150, bbox_inches="tight")
 
279
  alpha = gr.Slider(0.2, 1.0, value=0.85, step=0.05, label="Transparantie (punten)")
280
  n_components = gr.Slider(2, 10, value=10, step=1, label="Aantal PCA-componenten")
281
  standardize = gr.Checkbox(value=True, label="Standaardiseer metingen (aanbevolen)")
282
+ show_3d = gr.Checkbox(value=True, label="Toon 3D PCA (PC1–PC3)")
283
  run_btn = gr.Button("Update visualisaties")
284
  gr.HTML('<div class="callout smallnote">💡 <b>Tip:</b> Kies links een meting (bijv. BMI of cholesterol) en klik daarna op <b>Update visualisaties</b>.</div>')
285
  with gr.Group():
 
297
  with gr.Column(scale=2):
298
  with gr.Row():
299
  with gr.Column():
300
+ gr.Markdown("### Biplot (2D)")
301
  plot_biplot = gr.Plot()
302
  with gr.Column():
303
  gr.Markdown("### Uitlegvariantie")
 
317
  with gr.Column():
318
  gr.Markdown("### Top correlaties (features)")
319
  topcorr_tbl = gr.Dataframe(interactive=False)
320
+ with gr.Row():
321
+ with gr.Column():
322
+ gr.Markdown("### 3D PCA (PC1–PC3 — sleep om te draaien)")
323
+ plot3d = gr.Plot(visible=True)
324
 
325
+ inputs = [color_feat, arrow_scale, point_size, alpha, n_components, standardize, show_3d]
326
  run_btn.click(fn=pca_biplot, inputs=inputs,
327
+ outputs=[plot_biplot, plot_expl, table, summary, overview_tbl, overview_note_md, topcorr_tbl, plot3d])
328
  demo.load(fn=pca_biplot, inputs=inputs,
329
+ outputs=[plot_biplot, plot_expl, table, summary, overview_tbl, overview_note_md, topcorr_tbl, plot3d])
330
 
331
  animate_btn.click(fn=animate_pca,
332
  inputs=[color_feat, point_size, alpha, n_components, standardize],
requirements.txt CHANGED
@@ -3,3 +3,4 @@ matplotlib>=3.7.0
3
  numpy>=1.23.0
4
  scikit-learn>=1.2.0
5
  pandas>=1.5.0
 
 
3
  numpy>=1.23.0
4
  scikit-learn>=1.2.0
5
  pandas>=1.5.0
6
+ plotly>=5.15.0