Marcel0123 commited on
Commit
2710efa
·
verified ·
1 Parent(s): 169930a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -306
app.py CHANGED
@@ -1,331 +1,66 @@
1
- import time
2
- import json
3
- import numpy as np
4
- import pandas as pd
5
- import plotly.graph_objects as go
6
-
7
- import gradio as gr
8
- from sklearn.preprocessing import StandardScaler
9
- from sklearn.decomposition import PCA
10
- from sklearn.linear_model import SGDClassifier, LogisticRegression
11
- from sklearn.ensemble import RandomForestClassifier
12
- from sklearn.svm import SVC
13
- from sklearn.model_selection import train_test_split
14
- from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
15
 
16
- # =========================
17
- # Ingebouwde dataset
18
- # =========================
19
- def load_builtin_dataset(n=1000, seed=42):
20
- rng = np.random.default_rng(seed)
21
- age = rng.integers(18, 75, size=n)
22
- gender = rng.choice([0, 1], size=n) # dummy feature
23
- sleep_quality = np.clip(rng.normal(6.5, 1.5, size=n), 1, 10)
24
- energy = np.clip(rng.normal(6.0, 1.7, size=n), 1, 10)
25
- anhedonia = np.clip(rng.normal(3.5, 1.8, size=n), 1, 10)
26
- stress = np.clip(rng.normal(4.5, 2.0, size=n), 1, 10)
27
- social_support = np.clip(rng.normal(6.0, 1.8, size=n), 1, 10)
28
- activity = np.clip(rng.normal(3.0 + 0.4*energy - 0.2*stress, 1.5, size=n), 0, 10)
29
- phq9 = np.clip(
30
- 0.8*anhedonia + 0.7*stress - 0.5*sleep_quality - 0.4*energy
31
- + rng.normal(0, 1.2, size=n) + 5, 0, 27
32
- )
33
- logit = (
34
- + 0.65*anhedonia + 0.55*stress
35
- - 0.45*sleep_quality - 0.40*energy
36
- - 0.30*social_support - 0.20*activity
37
- + 0.01*(age - 40) + 0.05*gender
38
- + rng.normal(0, 0.6, size=n)
39
  )
40
- logit = logit - np.median(logit)
41
- prob = 1 / (1 + np.exp(-logit))
42
- depressed = (prob > 0.5).astype(int)
43
- df = pd.DataFrame({
44
- "age": age, "gender": gender, "sleep_quality": sleep_quality, "energy": energy,
45
- "anhedonia": anhedonia, "stress": stress, "social_support": social_support,
46
- "activity": activity, "phq9": phq9, "depressed": depressed
47
- })
48
- return df, "depressed"
49
-
50
- # =========================
51
- # Helpers
52
- # =========================
53
- def ensure_min_classes(y):
54
- if len(np.unique(y)) < 2:
55
- raise gr.Error("Label heeft minder dan 2 unieke klassen.")
56
 
57
- def make_base_fig(coords, y, title):
58
- fig = go.Figure()
59
  labels = pd.Series(y).astype(str).values
60
- for lbl in np.unique(labels):
 
61
  mask = labels == lbl
 
62
  fig.add_trace(go.Scatter(
63
- x=coords[mask, 0], y=coords[mask, 1], mode="markers",
 
64
  name=f"Klasse {lbl}",
65
- marker=dict(size=8, opacity=0.85, line=dict(width=0.5)),
66
- hovertemplate="PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra>"+f"Klasse {lbl}</extra>"
67
  ))
68
- fig.update_layout(
69
- title=title, xaxis_title="PC1", yaxis_title="PC2",
70
- legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
71
- margin=dict(l=10, r=10, t=60, b=10), template="plotly_dark", height=520
72
- )
73
  return fig
74
 
 
75
  def draw_decision_boundary(fig, clf2d, scaler2d, pca2d, X_scaled):
 
76
  coords = pca2d.transform(X_scaled)
77
  x_min, x_max = coords[:, 0].min() - 0.5, coords[:, 0].max() + 0.5
78
  y_min, y_max = coords[:, 1].min() - 0.5, coords[:, 1].max() + 0.5
79
- xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200), np.linspace(y_min, y_max, 200))
 
 
 
80
  grid_2d = np.c_[xx.ravel(), yy.ravel()]
81
  coords_grid_s = scaler2d.transform(grid_2d)
 
 
82
  if hasattr(clf2d, "predict_proba"):
83
  Z = clf2d.predict_proba(coords_grid_s)[:, -1]
84
  else:
85
  dec = clf2d.decision_function(coords_grid_s)
86
  Z = (dec - np.nanmin(dec)) / (np.nanmax(dec) - np.nanmin(dec) + 1e-9)
87
  Z = np.nan_to_num(Z, nan=0.5, posinf=1.0, neginf=0.0).reshape(xx.shape)
 
 
88
  fig.add_trace(go.Contour(
89
- x=np.linspace(x_min, x_max, 200), y=np.linspace(y_min, y_max, 200), z=Z,
90
- showscale=True, contours=dict(showlines=False), opacity=0.4, name="Beslissingsoppervlak"
 
 
 
 
 
 
91
  ))
92
  return fig
93
-
94
- def get_model(model_name, params):
95
- if model_name == "SGDClassifier (realtime)":
96
- return SGDClassifier(
97
- loss=params.get("sgd_loss", "log_loss"),
98
- alpha=params.get("sgd_alpha", 1e-4),
99
- learning_rate=params.get("sgd_lr", "optimal"),
100
- max_iter=1, random_state=42
101
- )
102
- elif model_name == "Logistic Regression":
103
- return LogisticRegression(max_iter=300)
104
- elif model_name == "Random Forest":
105
- return RandomForestClassifier(
106
- n_estimators=int(params.get("rf_n", 250)),
107
- max_depth=int(params.get("rf_depth", 8)) if params.get("rf_depth", None) else None,
108
- random_state=42
109
- )
110
- elif model_name == "SVM (RBF)":
111
- return SVC(probability=True, gamma="scale", C=params.get("svm_c", 1.0), random_state=42)
112
- return LogisticRegression(max_iter=300)
113
-
114
- # =========================
115
- # Train & stream
116
- # =========================
117
- def train_and_stream(test_size, model_name, params, epochs, pause_s):
118
- df, ycol = load_builtin_dataset()
119
- X = df.drop(columns=[ycol]).values
120
- y = df[ycol].values
121
- ensure_min_classes(y)
122
-
123
- X_train, X_test, y_train, y_test = train_test_split(
124
- X, y, test_size=test_size, random_state=42, stratify=y
125
- )
126
- scaler = StandardScaler().fit(X_train)
127
- X_train_s = scaler.transform(X_train)
128
- X_test_s = scaler.transform(X_test)
129
- pca = PCA(n_components=2, random_state=42).fit(X_train_s)
130
- coords_train = pca.transform(X_train_s)
131
- coords_test = pca.transform(X_test_s)
132
-
133
- clf = get_model(model_name, params)
134
-
135
- if model_name == "SGDClassifier (realtime)":
136
- classes = np.unique(y_train)
137
- for e in range(1, int(epochs) + 1):
138
- clf.partial_fit(X_train_s, y_train, classes=classes)
139
-
140
- y_pred = clf.predict(X_test_s)
141
- acc = accuracy_score(y_test, y_pred)
142
- f1 = f1_score(y_test, y_pred, average="weighted")
143
- try:
144
- y_proba = clf.predict_proba(X_test_s)[:, -1]
145
- auc = roc_auc_score(y_test, y_proba)
146
- except Exception:
147
- auc = np.nan
148
-
149
- scaler2d = StandardScaler().fit(coords_train)
150
- coords_train_s = scaler2d.transform(coords_train)
151
- clf2d = LogisticRegression(max_iter=200).fit(coords_train_s, y_train)
152
-
153
- fig_epoch = make_base_fig(coords_train, y_train, title=f"Epoch {e}/{epochs}")
154
- fig_epoch = draw_decision_boundary(fig_epoch, clf2d, scaler2d, pca, X_train_s)
155
- fig_epoch.add_trace(go.Scatter(
156
- x=coords_test[:, 0], y=coords_test[:, 1], mode="markers",
157
- name="Test set", marker=dict(size=10, symbol="circle-open", line=dict(width=2))
158
- ))
159
-
160
- metrics_md = (
161
- f"### Metrieken (testset)\n"
162
- f"**Accuracy:** {acc:.3f} \n"
163
- f"**F1 (gewogen):** {f1:.3f} \n"
164
- f"**ROC AUC:** {auc:.3f}\n"
165
- )
166
-
167
- # >>> Belangrijk: geef een **Figure**, geen dict
168
- yield fig_epoch, metrics_md
169
-
170
- if pause_s and float(pause_s) > 0:
171
- time.sleep(float(pause_s))
172
- return
173
- else:
174
- clf.fit(X_train_s, y_train)
175
- y_pred = clf.predict(X_test_s)
176
- acc = accuracy_score(y_test, y_pred)
177
- f1 = f1_score(y_test, y_pred, average="weighted")
178
- try:
179
- y_proba = clf.predict_proba(X_test_s)[:, -1]
180
- auc = roc_auc_score(y_test, y_proba)
181
- except Exception:
182
- auc = np.nan
183
-
184
- fig = make_base_fig(coords_train, y_train, title=f"Model: {model_name}")
185
- scaler2d = StandardScaler().fit(coords_train)
186
- coords_train_s = scaler2d.transform(coords_train)
187
- clf2d = LogisticRegression(max_iter=200).fit(coords_train_s, y_train)
188
- fig = draw_decision_boundary(fig, clf2d, scaler2d, pca, X_train_s)
189
- fig.add_trace(go.Scatter(
190
- x=coords_test[:, 0], y=coords_test[:, 1], mode="markers",
191
- name="Test set", marker=dict(size=10, symbol="circle-open", line=dict(width=2)),
192
- ))
193
-
194
- metrics_md = (
195
- f"### Metrieken (testset)\n"
196
- f"**Accuracy:** {acc:.3f} \n"
197
- f"**F1 (gewogen):** {f1:.3f} \n"
198
- f"**ROC AUC:** {auc:.3f}\n"
199
- )
200
- return fig, metrics_md
201
-
202
- def preview_dataset():
203
- df, _ = load_builtin_dataset()
204
- return df.head(10)
205
-
206
- def predict_row(model_name, params, row_index):
207
- df, ycol = load_builtin_dataset()
208
- Xdf = df.drop(columns=[ycol])
209
- y = df[ycol]
210
- idx = int(row_index)
211
- if idx < 0 or idx >= len(df):
212
- raise gr.Error("Ongeldige rij-index.")
213
- scaler = StandardScaler().fit(Xdf.values)
214
- Xs = scaler.transform(Xdf.values)
215
- clf = get_model(model_name, params)
216
- if isinstance(clf, SGDClassifier):
217
- clf = LogisticRegression(max_iter=300)
218
- clf.fit(Xs, y.values)
219
- x_row = Xs[idx].reshape(1, -1)
220
- pred = clf.predict(x_row)[0]
221
- proba = None
222
- if hasattr(clf, "predict_proba"):
223
- proba = clf.predict_proba(x_row)[0].max()
224
- pretty = json.dumps(df.iloc[[idx]].to_dict(orient="records")[0], ensure_ascii=False, indent=2)
225
- return f"### Gekozen patiënt (rij {idx})\n```json\n{pretty}\n```\n**Voorspelling:** {pred} \n" + (f"**Zekerheid (max. klasse-prob):** {proba:.3f}" if proba is not None else "")
226
-
227
- # =========================
228
- # UI
229
- # =========================
230
- DESCRIPTION = """
231
- # 🧠 Supervised Leren – Depressie (synthetisch, ingebouwd)
232
-
233
- - **Realtime** training (SGD) met **PCA-scatter** (elk bolletje = patiënt) en **beslissingsoppervlak**.
234
- - Eén pagina, strak en duidelijk. Geen uploads nodig.
235
- """
236
-
237
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", neutral_hue="slate")) as demo:
238
- gr.HTML("""
239
- <div style="display:flex; gap:16px; align-items:center; padding:12px; background:linear-gradient(90deg,#1f1b2e,#0f172a); border-radius:16px;">
240
- <div style="font-size:42px;">🧪</div>
241
- <div>
242
- <div style="font-size:22px; font-weight:700; color:#E9D5FF;">Hugging Face Space – Realtime Trainen & Visualiseren</div>
243
- <div style="opacity:0.85; color:#E2E8F0;">Ingebouwde dataset, geen uploads nodig</div>
244
- </div>
245
- </div>
246
- """)
247
- gr.Markdown(DESCRIPTION)
248
-
249
- with gr.Row():
250
- with gr.Column(scale=1):
251
- ds_preview = gr.Dataframe(label="Voorbeeld van de data (eerste 10 rijen)")
252
- btn_preview = gr.Button("📄 Dataset preview vernieuwen", variant="secondary")
253
- with gr.Column(scale=1):
254
- model_choice = gr.Radio(
255
- label="Model",
256
- choices=["SGDClassifier (realtime)", "Logistic Regression", "Random Forest", "SVM (RBF)"],
257
- value="SGDClassifier (realtime)"
258
- )
259
- with gr.Accordion("Hyperparameters", open=False):
260
- sgd_loss = gr.Dropdown(["log_loss", "hinge", "modified_huber"], value="log_loss", label="SGD loss")
261
- sgd_alpha = gr.Slider(1e-6, 1e-2, value=1e-4, step=1e-6, label="SGD alpha (L2)")
262
- sgd_lr = gr.Dropdown(["optimal", "invscaling", "constant", "adaptive"], value="optimal", label="SGD learning rate")
263
-
264
- rf_n = gr.Slider(50, 500, value=250, step=10, label="RandomForest n_estimators")
265
- rf_depth = gr.Slider(0, 20, value=8, step=1, label="RandomForest max_depth (0 = None)")
266
-
267
- svm_c = gr.Slider(0.1, 5.0, value=1.0, step=0.1, label="SVM C")
268
-
269
- test_size = gr.Slider(0.1, 0.5, value=0.25, step=0.05, label="Testset proportie")
270
- with gr.Row():
271
- epochs = gr.Slider(1, 30, value=12, step=1, label="Epochs (alleen realtime SGD)")
272
- pause_s = gr.Slider(0.0, 1.0, value=0.15, step=0.05, label="Pauze per epoch (s)")
273
-
274
- btn_train = gr.Button("🚀 Train & Visualiseer", variant="primary")
275
-
276
- with gr.Row():
277
- fig_out = gr.Plot(label="Visualisatie (PCA 2D) met beslissingsoppervlak")
278
- metrics_out = gr.Markdown(label="Metrieken")
279
-
280
- with gr.Row():
281
- with gr.Column():
282
- row_index = gr.Slider(0, 999, value=0, step=1, label="Kies een patiënt (rij-index) voor voorspelling")
283
- btn_predict = gr.Button("🔮 Voorspel voor gekozen patiënt", variant="secondary")
284
- pred_md = gr.Markdown(label="Voorspelling")
285
-
286
- # Preload: preview en direct trainen
287
- demo.load(lambda: preview_dataset(), inputs=None, outputs=[ds_preview])
288
-
289
- def _proxy_train(test_size_v, model_name_v,
290
- sgd_loss_v, sgd_alpha_v, sgd_lr_v, rf_n_v, rf_depth_v, svm_c_v,
291
- epochs_v, pause_v):
292
- params = dict(
293
- sgd_loss=sgd_loss_v, sgd_alpha=float(sgd_alpha_v), sgd_lr=sgd_lr_v,
294
- rf_n=int(rf_n_v),
295
- rf_depth=None if int(rf_depth_v) == 0 else int(rf_depth_v),
296
- svm_c=float(svm_c_v),
297
- )
298
- yield from train_and_stream(test_size_v, model_name_v, params, epochs_v, pause_v)
299
-
300
- demo.load(
301
- _proxy_train,
302
- inputs=[test_size, model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, epochs, pause_s],
303
- outputs=[fig_out, metrics_out]
304
- )
305
-
306
- btn_preview.click(lambda: preview_dataset(), inputs=None, outputs=[ds_preview])
307
-
308
- btn_train.click(
309
- _proxy_train,
310
- inputs=[test_size, model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, epochs, pause_s],
311
- outputs=[fig_out, metrics_out]
312
- )
313
-
314
- btn_predict.click(
315
- lambda model_name_v, sgd_loss_v, sgd_alpha_v, sgd_lr_v, rf_n_v, rf_depth_v, svm_c_v, row_idx:
316
- predict_row(
317
- model_name_v,
318
- dict(
319
- sgd_loss=sgd_loss_v, sgd_alpha=float(sgd_alpha_v), sgd_lr=sgd_lr_v,
320
- rf_n=int(rf_n_v),
321
- rf_depth=None if int(rf_depth_v) == 0 else int(rf_depth_v),
322
- svm_c=float(svm_c_v),
323
- ),
324
- row_idx
325
- ),
326
- inputs=[model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, row_index],
327
- outputs=[pred_md]
328
- )
329
-
330
- if __name__ == "__main__":
331
- demo.launch()
 
1
+ def make_base_fig(coords, y, title):
2
+ # Helder kleurpalet per klasse
3
+ palette = ["#2563eb", "#ef4444", "#10b981", "#f59e0b", "#a855f7", "#06b6d4", "#f97316", "#22c55e"]
4
+ fig = go.Figure()
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Eerst het canvas vormgeven (wit, duidelijke assen)
7
+ fig.update_layout(
8
+ title=title,
9
+ xaxis_title="PC1",
10
+ yaxis_title="PC2",
11
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
12
+ margin=dict(l=10, r=10, t=60, b=10),
13
+ template=None, # geen donker thema
14
+ plot_bgcolor="#ffffff", # wit
15
+ paper_bgcolor="#ffffff",
16
+ height=520
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Daarna de klassen als markers erbovenop
 
20
  labels = pd.Series(y).astype(str).values
21
+ uniq = list(np.unique(labels))
22
+ for i, lbl in enumerate(uniq):
23
  mask = labels == lbl
24
+ color = palette[i % len(palette)]
25
  fig.add_trace(go.Scatter(
26
+ x=coords[mask, 0], y=coords[mask, 1],
27
+ mode="markers",
28
  name=f"Klasse {lbl}",
29
+ marker=dict(size=10, opacity=0.95, color=color, line=dict(width=1, color="#111")),
30
+ hovertemplate="PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra>" + f"Klasse {lbl}</extra>"
31
  ))
 
 
 
 
 
32
  return fig
33
 
34
+
35
  def draw_decision_boundary(fig, clf2d, scaler2d, pca2d, X_scaled):
36
+ # Maak mesh in PCA-ruimte
37
  coords = pca2d.transform(X_scaled)
38
  x_min, x_max = coords[:, 0].min() - 0.5, coords[:, 0].max() + 0.5
39
  y_min, y_max = coords[:, 1].min() - 0.5, coords[:, 1].max() + 0.5
40
+ xx, yy = np.meshgrid(
41
+ np.linspace(x_min, x_max, 200),
42
+ np.linspace(y_min, y_max, 200)
43
+ )
44
  grid_2d = np.c_[xx.ravel(), yy.ravel()]
45
  coords_grid_s = scaler2d.transform(grid_2d)
46
+
47
+ # Score voor contour
48
  if hasattr(clf2d, "predict_proba"):
49
  Z = clf2d.predict_proba(coords_grid_s)[:, -1]
50
  else:
51
  dec = clf2d.decision_function(coords_grid_s)
52
  Z = (dec - np.nanmin(dec)) / (np.nanmax(dec) - np.nanmin(dec) + 1e-9)
53
  Z = np.nan_to_num(Z, nan=0.5, posinf=1.0, neginf=0.0).reshape(xx.shape)
54
+
55
+ # Contour als LIJNEN (geen vulling) zodat markers zichtbaar blijven
56
  fig.add_trace(go.Contour(
57
+ x=np.linspace(x_min, x_max, 200),
58
+ y=np.linspace(y_min, y_max, 200),
59
+ z=Z,
60
+ showscale=False,
61
+ contours=dict(coloring="lines", showlines=True),
62
+ line=dict(width=1),
63
+ opacity=0.8,
64
+ name="Beslissingslijnen"
65
  ))
66
  return fig