Marcel0123 commited on
Commit
5217123
·
verified ·
1 Parent(s): 300afd0

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +15 -12
  2. app.py +153 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,16 @@
1
- ---
2
- title: Supervised Learning
3
- emoji: 📚
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.44.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Live Supervised Learning (Linear Regression) — with Loss Curve
 
 
 
 
 
 
 
 
 
 
2
 
3
+ Gradio-app die in real-time laat zien hoe een lineaire regressie leert op een 2D-dataset.
4
+ Deze versie toont **twee live plots**: (1) data + regressielijn en (2) **loss curve (MSE per epoch)**.
5
+ De app start automatisch met trainen bij het openen (geen uploads nodig).
6
+
7
+ ## Lokaal draaien
8
+ ```bash
9
+ pip install -r requirements.txt
10
+ python app.py
11
+ ```
12
+
13
+ ## Deploy naar Hugging Face Spaces
14
+ 1. Maak een nieuwe Space aan → **Gradio** template.
15
+ 2. Upload `app.py`, `requirements.txt` en `README.md` (of upload het zip-bestand en pak het uit).
16
+ 3. Start de Space. De app begint automatisch met trainen met de standaardwaarden.
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from sklearn import datasets
5
+ from sklearn.utils import shuffle
6
+
7
+ # ------------------------------
8
+ # Data helpers
9
+ # ------------------------------
10
+ def load_dataset(name: str, n_samples: int = 200, noise: float = 10.0):
11
+ """Return (x, y, label) with x,y as 1D numpy arrays for easy plotting."""
12
+ if name == "Synthetisch":
13
+ rng = np.random.RandomState(42)
14
+ X = np.linspace(-3, 3, n_samples)
15
+ true_w, true_b = 4.0, -2.0
16
+ y = true_w * X + true_b + rng.normal(0, noise, size=n_samples)
17
+ return X, y, "Synthetische data (y = 4x - 2 + noise)"
18
+ elif name == "Diabetes (BMI vs target)":
19
+ d = datasets.load_diabetes()
20
+ X = d.data[:, 2] # BMI feature
21
+ y = d.target
22
+ return X, y, "Diabetes: BMI vs. disease progression"
23
+ elif name == "California Housing (MedInc vs value)":
24
+ try:
25
+ ch = datasets.fetch_california_housing()
26
+ X = ch.data[:, 0] # MedInc
27
+ y = ch.target # MedHouseValue
28
+ return X, y, "California Housing: MedInc vs. house value"
29
+ except Exception:
30
+ X, y, _ = load_dataset("Synthetisch", n_samples=n_samples, noise=noise)
31
+ return X, y, "(Fallback) Synthetische data"
32
+ else:
33
+ raise ValueError("Onbekende dataset")
34
+
35
+ # ------------------------------
36
+ # Training (SGD) voor y = w*x + b met real-time visualisatie
37
+ # ------------------------------
38
+ def sgd_train_generator(dataset_name, lr, epochs, batch_size, n_samples, noise, seed):
39
+ rng = np.random.RandomState(int(seed))
40
+ x, y, label = load_dataset(dataset_name, n_samples=n_samples, noise=noise)
41
+
42
+ n = x.shape[0]
43
+ x = x.astype(np.float64)
44
+ y = y.astype(np.float64)
45
+
46
+ w = 0.0
47
+ b = 0.0
48
+
49
+ x_min, x_max = float(np.min(x)), float(np.max(x))
50
+ losses = []
51
+
52
+ for epoch in range(1, int(epochs) + 1):
53
+ x, y = shuffle(x, y, random_state=rng)
54
+
55
+ for start in range(0, n, int(batch_size)):
56
+ end = min(start + int(batch_size), n)
57
+ xb = x[start:end]
58
+ yb = y[start:end]
59
+
60
+ yhat = w * xb + b
61
+ err = yb - yhat
62
+ dw = -(2.0 / xb.size) * np.sum(xb * err)
63
+ db = -(2.0 / xb.size) * np.sum(err)
64
+
65
+ w -= lr * dw
66
+ b -= lr * db
67
+
68
+ # Volledige-set MSE
69
+ y_pred = w * x + b
70
+ mse = float(np.mean((y - y_pred) ** 2))
71
+ losses.append(mse)
72
+
73
+ # Plot 1: data + regressielijn
74
+ fig_main = plt.figure(figsize=(6, 4))
75
+ ax1 = fig_main.add_subplot(111)
76
+ ax1.scatter(x, y, alpha=0.6, s=18)
77
+ xs = np.linspace(x_min, x_max, 200)
78
+ ax1.plot(xs, w * xs + b, linewidth=2)
79
+ ax1.set_title(f"{label}\nEpoch {epoch}/{epochs} — MSE: {mse:.4f}")
80
+ ax1.set_xlabel("x")
81
+ ax1.set_ylabel("y")
82
+ ax1.grid(True, linestyle=":", linewidth=0.6)
83
+ plt.tight_layout()
84
+
85
+ # Plot 2: loss-curve
86
+ fig_loss = plt.figure(figsize=(6, 3))
87
+ ax2 = fig_loss.add_subplot(111)
88
+ ax2.plot(range(1, len(losses)+1), losses, marker="o", linewidth=1.5)
89
+ ax2.set_title("Loss (MSE) per epoch")
90
+ ax2.set_xlabel("Epoch")
91
+ ax2.set_ylabel("MSE")
92
+ ax2.grid(True, linestyle=":", linewidth=0.6)
93
+ plt.tight_layout()
94
+
95
+ yield fig_main, fig_loss, f"w = {w:.4f}, b = {b:.4f}, MSE = {mse:.4f}"
96
+
97
+ # ------------------------------
98
+ # Uitlegtekst
99
+ # ------------------------------
100
+ THEORY_MD = r"""
101
+ ### Wat is supervised learning?
102
+ Bij **supervised learning** leer je een model aan de hand van voorbeeldparen (input -> gewenste output). Het doel is een functie te vinden die de relatie tussen input en output goed benadert.
103
+
104
+ ### Lineaire regressie in 1D
105
+ We passen een lijn \( y = w x + b \) aan op data. We minimaliseren de **Mean Squared Error (MSE)**:
106
+ \[ \operatorname{MSE} = \frac{1}{N} \sum_{i=1}^N (y_i - (w x_i + b))^2 \]
107
+ We gebruiken **stochastic gradient descent (SGD)** om \(w\) en \(b\) stapje voor stapje te verbeteren.
108
+ """
109
+
110
+ # ------------------------------
111
+ # Gradio UI
112
+ # ------------------------------
113
+ with gr.Blocks(title="Live Supervised Learning: Linear Regression") as demo:
114
+ gr.Markdown("# Live Supervised Learning — Lineaire Regressie")
115
+ with gr.Tabs():
116
+ with gr.TabItem("Uitleg"):
117
+ gr.Markdown(THEORY_MD)
118
+ with gr.TabItem("Playground"):
119
+ with gr.Row():
120
+ with gr.Column(scale=1):
121
+ dataset = gr.Dropdown(
122
+ ["Synthetisch", "Diabetes (BMI vs target)", "California Housing (MedInc vs value)"],
123
+ value="Synthetisch",
124
+ label="Dataset"
125
+ )
126
+ lr = gr.Slider(1e-4, 1e-0, value=1e-2, step=1e-4, label="Learning Rate")
127
+ epochs = gr.Slider(1, 200, value=50, step=1, label="Epochs")
128
+ batch = gr.Slider(1, 512, value=64, step=1, label="Batchgrootte")
129
+ n_samples = gr.Slider(50, 2000, value=300, step=10, label="Aantal samples (synthetisch)")
130
+ noise = gr.Slider(0.0, 30.0, value=10.0, step=0.5, label="Noise (synthetisch)")
131
+ seed = gr.Slider(0, 9999, value=42, step=1, label="Random seed")
132
+ train_btn = gr.Button("Train live")
133
+ with gr.Column(scale=2):
134
+ plot_main = gr.Plot(label="Data & regressielijn (live)")
135
+ plot_loss = gr.Plot(label="Loss-curve (MSE per epoch)")
136
+ metrics = gr.Markdown()
137
+
138
+ # Knoop de generator aan de UI
139
+ train_btn.click(
140
+ fn=sgd_train_generator,
141
+ inputs=[dataset, lr, epochs, batch, n_samples, noise, seed],
142
+ outputs=[plot_main, plot_loss, metrics]
143
+ )
144
+
145
+ # Auto-train bij het openen
146
+ demo.load(
147
+ fn=sgd_train_generator,
148
+ inputs=[dataset, lr, epochs, batch, n_samples, noise, seed],
149
+ outputs=[plot_main, plot_loss, metrics]
150
+ )
151
+
152
+ if __name__ == "__main__":
153
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.36.0
2
+ matplotlib>=3.7.0
3
+ numpy>=1.23.0
4
+ scikit-learn>=1.2.0