|
|
import time |
|
|
import json |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import plotly.graph_objects as go |
|
|
|
|
|
import gradio as gr |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.decomposition import PCA |
|
|
from sklearn.linear_model import SGDClassifier, LogisticRegression |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score |
|
|
|
|
|
|
|
|
|
|
|
def load_builtin_dataset(n=1000, seed=42): |
|
|
rng = np.random.default_rng(seed) |
|
|
age = rng.integers(18, 75, size=n) |
|
|
gender = rng.choice([0, 1], size=n) |
|
|
sleep_quality = np.clip(rng.normal(6.5, 1.5, size=n), 1, 10) |
|
|
energy = np.clip(rng.normal(6.0, 1.7, size=n), 1, 10) |
|
|
anhedonia = np.clip(rng.normal(3.5, 1.8, size=n), 1, 10) |
|
|
stress = np.clip(rng.normal(4.5, 2.0, size=n), 1, 10) |
|
|
social_support = np.clip(rng.normal(6.0, 1.8, size=n), 1, 10) |
|
|
activity = np.clip(rng.normal(3.0 + 0.4*energy - 0.2*stress, 1.5, size=n), 0, 10) |
|
|
phq9 = np.clip( |
|
|
0.8*anhedonia + 0.7*stress - 0.5*sleep_quality - 0.4*energy |
|
|
+ rng.normal(0, 1.2, size=n) + 5, 0, 27 |
|
|
) |
|
|
logit = ( |
|
|
+ 0.65*anhedonia + 0.55*stress |
|
|
- 0.45*sleep_quality - 0.40*energy |
|
|
- 0.30*social_support - 0.20*activity |
|
|
+ 0.01*(age - 40) + 0.05*gender |
|
|
+ rng.normal(0, 0.6, size=n) |
|
|
) |
|
|
logit -= np.median(logit) |
|
|
prob = 1 / (1 + np.exp(-logit)) |
|
|
depressed = (prob > 0.5).astype(int) |
|
|
df = pd.DataFrame({ |
|
|
"age": age, "gender": gender, "sleep_quality": sleep_quality, "energy": energy, |
|
|
"anhedonia": anhedonia, "stress": stress, "social_support": social_support, |
|
|
"activity": activity, "phq9": phq9, "depressed": depressed |
|
|
}) |
|
|
return df, "depressed" |
|
|
|
|
|
|
|
|
|
|
|
def ensure_min_classes(y): |
|
|
if len(np.unique(y)) < 2: |
|
|
raise gr.Error("Label heeft minder dan 2 unieke klassen.") |
|
|
|
|
|
def make_base_fig(coords, y, title): |
|
|
|
|
|
palette = ["#2563eb", "#ef4444", "#10b981", "#f59e0b", "#a855f7", "#06b6d4", "#f97316", "#22c55e"] |
|
|
fig = go.Figure() |
|
|
fig.update_layout( |
|
|
title=title, xaxis_title="PC1", yaxis_title="PC2", |
|
|
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), |
|
|
margin=dict(l=10, r=10, t=60, b=10), |
|
|
template=None, plot_bgcolor="#ffffff", paper_bgcolor="#ffffff", height=520 |
|
|
) |
|
|
labels = pd.Series(y).astype(str).values |
|
|
uniq = list(np.unique(labels)) |
|
|
for i, lbl in enumerate(uniq): |
|
|
mask = labels == lbl |
|
|
color = palette[i % len(palette)] |
|
|
fig.add_trace(go.Scatter( |
|
|
x=coords[mask, 0], y=coords[mask, 1], |
|
|
mode="markers", name=f"Klasse {lbl}", |
|
|
marker=dict(size=10, opacity=0.95, color=color, line=dict(width=1, color="#111")), |
|
|
hovertemplate="PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra>" + f"Klasse {lbl}</extra>" |
|
|
)) |
|
|
return fig |
|
|
|
|
|
def draw_decision_boundary(fig, clf2d, scaler2d, pca2d, X_scaled): |
|
|
coords = pca2d.transform(X_scaled) |
|
|
x_min, x_max = coords[:, 0].min() - 0.5, coords[:, 0].max() + 0.5 |
|
|
y_min, y_max = coords[:, 1].min() - 0.5, coords[:, 1].max() + 0.5 |
|
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200), np.linspace(y_min, y_max, 200)) |
|
|
grid_2d = np.c_[xx.ravel(), yy.ravel()] |
|
|
coords_grid_s = scaler2d.transform(grid_2d) |
|
|
if hasattr(clf2d, "predict_proba"): |
|
|
Z = clf2d.predict_proba(coords_grid_s)[:, -1] |
|
|
else: |
|
|
dec = clf2d.decision_function(coords_grid_s) |
|
|
Z = (dec - np.nanmin(dec)) / (np.nanmax(dec) - np.nanmin(dec) + 1e-9) |
|
|
Z = np.nan_to_num(Z, nan=0.5, posinf=1.0, neginf=0.0).reshape(xx.shape) |
|
|
fig.add_trace(go.Contour( |
|
|
x=np.linspace(x_min, x_max, 200), |
|
|
y=np.linspace(y_min, y_max, 200), |
|
|
z=Z, |
|
|
showscale=False, |
|
|
contours=dict(coloring="lines", showlines=True), |
|
|
line=dict(width=1), |
|
|
opacity=0.8, |
|
|
name="Beslissingslijnen" |
|
|
)) |
|
|
return fig |
|
|
|
|
|
def get_model(model_name, params): |
|
|
if model_name == "SGDClassifier (realtime)": |
|
|
return SGDClassifier( |
|
|
loss=params.get("sgd_loss", "log_loss"), |
|
|
alpha=params.get("sgd_alpha", 1e-4), |
|
|
learning_rate=params.get("sgd_lr", "optimal"), |
|
|
max_iter=1, random_state=42 |
|
|
) |
|
|
elif model_name == "Logistic Regression": |
|
|
return LogisticRegression(max_iter=300) |
|
|
elif model_name == "Random Forest": |
|
|
return RandomForestClassifier( |
|
|
n_estimators=int(params.get("rf_n", 250)), |
|
|
max_depth=int(params.get("rf_depth", 8)) if params.get("rf_depth", None) else None, |
|
|
random_state=42 |
|
|
) |
|
|
elif model_name == "SVM (RBF)": |
|
|
return SVC(probability=True, gamma="scale", C=params.get("svm_c", 1.0), random_state=42) |
|
|
return LogisticRegression(max_iter=300) |
|
|
|
|
|
|
|
|
|
|
|
def train_and_stream(test_size, model_name, params, epochs, pause_s): |
|
|
df, ycol = load_builtin_dataset() |
|
|
X = df.drop(columns=[ycol]).values |
|
|
y = df[ycol].values |
|
|
ensure_min_classes(y) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y, test_size=test_size, random_state=42, stratify=y |
|
|
) |
|
|
scaler = StandardScaler().fit(X_train) |
|
|
X_train_s = scaler.transform(X_train) |
|
|
X_test_s = scaler.transform(X_test) |
|
|
pca = PCA(n_components=2, random_state=42).fit(X_train_s) |
|
|
coords_train = pca.transform(X_train_s) |
|
|
coords_test = pca.transform(X_test_s) |
|
|
|
|
|
clf = get_model(model_name, params) |
|
|
|
|
|
if model_name == "SGDClassifier (realtime)": |
|
|
classes = np.unique(y_train) |
|
|
for e in range(1, int(epochs) + 1): |
|
|
clf.partial_fit(X_train_s, y_train, classes=classes) |
|
|
|
|
|
y_pred = clf.predict(X_test_s) |
|
|
acc = accuracy_score(y_test, y_pred) |
|
|
f1 = f1_score(y_test, y_pred, average="weighted") |
|
|
try: |
|
|
y_proba = clf.predict_proba(X_test_s)[:, -1] |
|
|
auc = roc_auc_score(y_test, y_proba) |
|
|
except Exception: |
|
|
auc = np.nan |
|
|
|
|
|
scaler2d = StandardScaler().fit(coords_train) |
|
|
coords_train_s = scaler2d.transform(coords_train) |
|
|
clf2d = LogisticRegression(max_iter=200).fit(coords_train_s, y_train) |
|
|
|
|
|
title = f"Epoch {e}/{epochs} • Acc {acc:.2f} • F1 {f1:.2f}" |
|
|
fig_epoch = make_base_fig(coords_train, y_train, title=title) |
|
|
fig_epoch = draw_decision_boundary(fig_epoch, clf2d, scaler2d, pca, X_train_s) |
|
|
fig_epoch.add_trace(go.Scatter( |
|
|
x=coords_test[:, 0], y=coords_test[:, 1], |
|
|
mode="markers", name="Test set", |
|
|
marker=dict(size=10, symbol="circle-open", line=dict(width=2, color="#111")), |
|
|
hovertemplate="PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra>Test set</extra>" |
|
|
)) |
|
|
|
|
|
metrics_md = ( |
|
|
f"### Metrieken (testset)\n" |
|
|
f"**Accuracy:** {acc:.3f} \n" |
|
|
f"**F1 (gewogen):** {f1:.3f} \n" |
|
|
f"**ROC AUC:** {auc:.3f}\n" |
|
|
) |
|
|
|
|
|
|
|
|
yield fig_epoch, metrics_md |
|
|
|
|
|
if pause_s and float(pause_s) > 0: |
|
|
time.sleep(float(pause_s)) |
|
|
return |
|
|
else: |
|
|
clf.fit(X_train_s, y_train) |
|
|
y_pred = clf.predict(X_test_s) |
|
|
acc = accuracy_score(y_test, y_pred) |
|
|
f1 = f1_score(y_test, y_pred, average="weighted") |
|
|
try: |
|
|
y_proba = clf.predict_proba(X_test_s)[:, -1] |
|
|
auc = roc_auc_score(y_test, y_proba) |
|
|
except Exception: |
|
|
auc = np.nan |
|
|
|
|
|
fig = make_base_fig(coords_train, y_train, title=f"Model: {model_name}") |
|
|
scaler2d = StandardScaler().fit(coords_train) |
|
|
coords_train_s = scaler2d.transform(coords_train) |
|
|
clf2d = LogisticRegression(max_iter=200).fit(coords_train_s, y_train) |
|
|
fig = draw_decision_boundary(fig, clf2d, scaler2d, pca, X_train_s) |
|
|
fig.add_trace(go.Scatter( |
|
|
x=coords_test[:, 0], y=coords_test[:, 1], |
|
|
mode="markers", name="Test set", |
|
|
marker=dict(size=10, symbol="circle-open", line=dict(width=2, color="#111")), |
|
|
)) |
|
|
|
|
|
metrics_md = ( |
|
|
f"### Metrieken (testset)\n" |
|
|
f"**Accuracy:** {acc:.3f} \n" |
|
|
f"**F1 (gewogen):** {f1:.3f} \n" |
|
|
f"**ROC AUC:** {auc:.3f}\n" |
|
|
) |
|
|
return fig, metrics_md |
|
|
|
|
|
|
|
|
|
|
|
DESCRIPTION = """ |
|
|
# 🧠 Supervised Leren – Depressie (synthetisch, ingebouwd) |
|
|
- **Realtime** training (SGD) met **PCA-scatter** (elk bolletje = patiënt) en **beslissingslijnen**. |
|
|
- Eén pagina, helder wit canvas. Geen uploads nodig. |
|
|
""" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", neutral_hue="slate")) as demo: |
|
|
gr.Markdown(DESCRIPTION) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
ds_preview = gr.Dataframe(label="Voorbeeld van de data (eerste 10 rijen)") |
|
|
btn_preview = gr.Button("📄 Dataset preview vernieuwen", variant="secondary") |
|
|
with gr.Column(scale=1): |
|
|
model_choice = gr.Radio( |
|
|
label="Model", |
|
|
choices=["SGDClassifier (realtime)", "Logistic Regression", "Random Forest", "SVM (RBF)"], |
|
|
value="SGDClassifier (realtime)" |
|
|
) |
|
|
with gr.Accordion("Hyperparameters", open=False): |
|
|
sgd_loss = gr.Dropdown(["log_loss", "hinge", "modified_huber"], value="log_loss", label="SGD loss") |
|
|
sgd_alpha = gr.Slider(1e-6, 1e-2, value=1e-4, step=1e-6, label="SGD alpha (L2)") |
|
|
sgd_lr = gr.Dropdown(["optimal", "invscaling", "constant", "adaptive"], value="optimal", label="SGD learning rate") |
|
|
|
|
|
rf_n = gr.Slider(50, 500, value=250, step=10, label="RandomForest n_estimators") |
|
|
rf_depth = gr.Slider(0, 20, value=8, step=1, label="RandomForest max_depth (0 = None)") |
|
|
svm_c = gr.Slider(0.1, 5.0, value=1.0, step=0.1, label="SVM C") |
|
|
|
|
|
test_size = gr.Slider(0.1, 0.5, value=0.25, step=0.05, label="Testset proportie") |
|
|
with gr.Row(): |
|
|
epochs = gr.Slider(1, 30, value=12, step=1, label="Epochs (alleen realtime SGD)") |
|
|
pause_s = gr.Slider(0.0, 1.0, value=0.15, step=0.05, label="Pauze per epoch (s)") |
|
|
|
|
|
btn_train = gr.Button("🚀 Train & Visualiseer", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
fig_out = gr.Plot(label="Visualisatie (PCA 2D) met beslissingslijnen)") |
|
|
metrics_out = gr.Markdown(label="Metrieken") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
row_index = gr.Slider(0, 999, value=0, step=1, label="Kies een patiënt (rij-index) voor voorspelling") |
|
|
btn_predict = gr.Button("🔮 Voorspel voor gekozen patiënt", variant="secondary") |
|
|
pred_md = gr.Markdown(label="Voorspelling") |
|
|
|
|
|
|
|
|
demo.load(lambda: load_builtin_dataset()[0].head(10), inputs=None, outputs=[ds_preview]) |
|
|
|
|
|
def _proxy_train(test_size_v, model_name_v, |
|
|
sgd_loss_v, sgd_alpha_v, sgd_lr_v, rf_n_v, rf_depth_v, svm_c_v, |
|
|
epochs_v, pause_v): |
|
|
params = dict( |
|
|
sgd_loss=sgd_loss_v, |
|
|
sgd_alpha=float(sgd_alpha_v), |
|
|
sgd_lr=sgd_lr_v, |
|
|
rf_n=int(rf_n_v), |
|
|
rf_depth=None if int(rf_depth_v) == 0 else int(rf_depth_v), |
|
|
svm_c=float(svm_c_v), |
|
|
) |
|
|
yield from train_and_stream(test_size_v, model_name_v, params, epochs_v, pause_v) |
|
|
|
|
|
demo.load( |
|
|
_proxy_train, |
|
|
inputs=[test_size, model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, epochs, pause_s], |
|
|
outputs=[fig_out, metrics_out] |
|
|
) |
|
|
|
|
|
btn_preview.click(lambda: load_builtin_dataset()[0].head(10), inputs=None, outputs=[ds_preview]) |
|
|
|
|
|
btn_train.click( |
|
|
_proxy_train, |
|
|
inputs=[test_size, model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, epochs, pause_s], |
|
|
outputs=[fig_out, metrics_out] |
|
|
) |
|
|
|
|
|
btn_predict.click( |
|
|
lambda model_name_v, sgd_loss_v, sgd_alpha_v, sgd_lr_v, rf_n_v, rf_depth_v, svm_c_v, row_idx: |
|
|
(lambda df, ycol: ( |
|
|
(lambda scaler, Xs, y, idx: |
|
|
(lambda clf: |
|
|
(lambda x_row, pred, proba, pretty: |
|
|
f"### Gekozen patiënt (rij {idx})\n```json\n{pretty}\n```\n**Voorspelling:** {pred} \n" |
|
|
+ (f"**Zekerheid (max. klasse-prob):** {proba:.3f}" if proba is not None else "") |
|
|
)( |
|
|
Xs[idx].reshape(1, -1), |
|
|
clf.predict(Xs[idx].reshape(1, -1))[0], |
|
|
(clf.predict_proba(Xs[idx].reshape(1, -1))[0].max() if hasattr(clf, 'predict_proba') else None), |
|
|
json.dumps(df.iloc[[idx]].to_dict(orient='records')[0], ensure_ascii=False, indent=2) |
|
|
) |
|
|
)( |
|
|
(lambda base_clf: |
|
|
LogisticRegression(max_iter=300) if isinstance(base_clf, SGDClassifier) else base_clf |
|
|
)(get_model(model_name_v, dict( |
|
|
sgd_loss=sgd_loss_v, sgd_alpha=float(sgd_alpha_v), sgd_lr=sgd_lr_v, |
|
|
rf_n=int(rf_n_v), rf_depth=None if int(rf_depth_v)==0 else int(rf_depth_v), svm_c=float(svm_c_v) |
|
|
))).fit(Xs, y.values) |
|
|
) |
|
|
)(StandardScaler().fit(df.drop(columns=[ycol]).values), |
|
|
StandardScaler().fit(df.drop(columns=[ycol]).values).transform(df.drop(columns=[ycol]).values), |
|
|
df[ycol], int(row_idx)) |
|
|
))(*load_builtin_dataset()), |
|
|
inputs=[model_choice, sgd_loss, sgd_alpha, sgd_lr, rf_n, rf_depth, svm_c, row_index], |
|
|
outputs=[pred_md] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|