Spaces:

Marcel0123
/

supervised-learning

Sleeping

App Files Files Community

Marcel0123 commited on Aug 27, 2025

Commit

15de49c

verified ·

1 Parent(s): 5217123

Upload 3 files

Browse files

Files changed (2) hide show

README.md +5 -7
app.py +24 -37

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
-# Live Supervised Learning (Linear Regression) — with Loss Curve
-Gradio-app die in real-time laat zien hoe een lineaire regressie leert op een 2D-dataset.
-Deze versie toont **twee live plots**: (1) data + regressielijn en (2) **loss curve (MSE per epoch)**.
-De app start automatisch met trainen bij het openen (geen uploads nodig).
-## Lokaal draaien
 ```bash
 pip install -r requirements.txt
 python app.py
@@ -12,5 +10,5 @@ python app.py
 ## Deploy naar Hugging Face Spaces
 1. Maak een nieuwe Space aan → **Gradio** template.
-2. Upload `app.py`, `requirements.txt` en `README.md` (of upload het zip-bestand en pak het uit).
-3. Start de Space. De app begint automatisch met trainen met de standaardwaarden.

+# Live Supervised Learning (Linear Regression)
+Een Gradio-app die in real-time laat zien hoe een lineaire regressie leert op een 2D-dataset.
+## Run lokaal
 ```bash
 pip install -r requirements.txt
 python app.py
 ## Deploy naar Hugging Face Spaces
 1. Maak een nieuwe Space aan → **Gradio** template.
+2. Upload `app.py`, `requirements.txt` en `README.md`.
+3. Wacht tot de Space bouwt en start.

app.py CHANGED Viewed

@@ -17,14 +17,14 @@ def load_dataset(name: str, n_samples: int = 200, noise: float = 10.0):
         return X, y, "Synthetische data (y = 4x - 2 + noise)"
     elif name == "Diabetes (BMI vs target)":
         d = datasets.load_diabetes()
-        X = d.data[:, 2]  # BMI feature
         y = d.target
         return X, y, "Diabetes: BMI vs. disease progression"
     elif name == "California Housing (MedInc vs value)":
         try:
             ch = datasets.fetch_california_housing()
-            X = ch.data[:, 0]  # MedInc
-            y = ch.target      # MedHouseValue
             return X, y, "California Housing: MedInc vs. house value"
         except Exception:
             X, y, _ = load_dataset("Synthetisch", n_samples=n_samples, noise=noise)
@@ -32,47 +32,40 @@ def load_dataset(name: str, n_samples: int = 200, noise: float = 10.0):
     else:
         raise ValueError("Onbekende dataset")
 # ------------------------------
-# Training (SGD) voor y = w*x + b met real-time visualisatie
 # ------------------------------
 def sgd_train_generator(dataset_name, lr, epochs, batch_size, n_samples, noise, seed):
     rng = np.random.RandomState(int(seed))
     x, y, label = load_dataset(dataset_name, n_samples=n_samples, noise=noise)
     n = x.shape[0]
     x = x.astype(np.float64)
     y = y.astype(np.float64)
-    w = 0.0
-    b = 0.0
     x_min, x_max = float(np.min(x)), float(np.max(x))
-    losses = []
     for epoch in range(1, int(epochs) + 1):
         x, y = shuffle(x, y, random_state=rng)
         for start in range(0, n, int(batch_size)):
             end = min(start + int(batch_size), n)
-            xb = x[start:end]
-            yb = y[start:end]
             yhat = w * xb + b
             err = yb - yhat
             dw = -(2.0 / xb.size) * np.sum(xb * err)
             db = -(2.0 / xb.size) * np.sum(err)
             w -= lr * dw
             b -= lr * db
-        # Volledige-set MSE
         y_pred = w * x + b
         mse = float(np.mean((y - y_pred) ** 2))
-        losses.append(mse)
-        # Plot 1: data + regressielijn
-        fig_main = plt.figure(figsize=(6, 4))
-        ax1 = fig_main.add_subplot(111)
         ax1.scatter(x, y, alpha=0.6, s=18)
         xs = np.linspace(x_min, x_max, 200)
         ax1.plot(xs, w * xs + b, linewidth=2)
@@ -82,24 +75,25 @@ def sgd_train_generator(dataset_name, lr, epochs, batch_size, n_samples, noise,
         ax1.grid(True, linestyle=":", linewidth=0.6)
         plt.tight_layout()
-        # Plot 2: loss-curve
-        fig_loss = plt.figure(figsize=(6, 3))
-        ax2 = fig_loss.add_subplot(111)
-        ax2.plot(range(1, len(losses)+1), losses, marker="o", linewidth=1.5)
-        ax2.set_title("Loss (MSE) per epoch")
         ax2.set_xlabel("Epoch")
         ax2.set_ylabel("MSE")
         ax2.grid(True, linestyle=":", linewidth=0.6)
         plt.tight_layout()
-        yield fig_main, fig_loss, f"w = {w:.4f}, b = {b:.4f}, MSE = {mse:.4f}"
 # ------------------------------
 # Uitlegtekst
 # ------------------------------
 THEORY_MD = r"""
 ### Wat is supervised learning?
-Bij **supervised learning** leer je een model aan de hand van voorbeeldparen (input -> gewenste output). Het doel is een functie te vinden die de relatie tussen input en output goed benadert.
 ### Lineaire regressie in 1D
 We passen een lijn \( y = w x + b \) aan op data. We minimaliseren de **Mean Squared Error (MSE)**:
@@ -107,6 +101,7 @@ We passen een lijn \( y = w x + b \) aan op data. We minimaliseren de **Mean Squ
 We gebruiken **stochastic gradient descent (SGD)** om \(w\) en \(b\) stapje voor stapje te verbeteren.
 """
 # ------------------------------
 # Gradio UI
 # ------------------------------
@@ -131,22 +126,14 @@ with gr.Blocks(title="Live Supervised Learning: Linear Regression") as demo:
                     seed = gr.Slider(0, 9999, value=42, step=1, label="Random seed")
                     train_btn = gr.Button("Train live")
                 with gr.Column(scale=2):
-                    plot_main = gr.Plot(label="Data & regressielijn (live)")
-                    plot_loss = gr.Plot(label="Loss-curve (MSE per epoch)")
                     metrics = gr.Markdown()
-            # Knoop de generator aan de UI
             train_btn.click(
                 fn=sgd_train_generator,
                 inputs=[dataset, lr, epochs, batch, n_samples, noise, seed],
-                outputs=[plot_main, plot_loss, metrics]
-            )
-            # Auto-train bij het openen
-            demo.load(
-                fn=sgd_train_generator,
-                inputs=[dataset, lr, epochs, batch, n_samples, noise, seed],
-                outputs=[plot_main, plot_loss, metrics]
             )
 if __name__ == "__main__":

         return X, y, "Synthetische data (y = 4x - 2 + noise)"
     elif name == "Diabetes (BMI vs target)":
         d = datasets.load_diabetes()
+        X = d.data[:, 2]
         y = d.target
         return X, y, "Diabetes: BMI vs. disease progression"
     elif name == "California Housing (MedInc vs value)":
         try:
             ch = datasets.fetch_california_housing()
+            X = ch.data[:, 0]
+            y = ch.target
             return X, y, "California Housing: MedInc vs. house value"
         except Exception:
             X, y, _ = load_dataset("Synthetisch", n_samples=n_samples, noise=noise)
     else:
         raise ValueError("Onbekende dataset")
 # ------------------------------
+# Training (SGD) met live plots
 # ------------------------------
 def sgd_train_generator(dataset_name, lr, epochs, batch_size, n_samples, noise, seed):
     rng = np.random.RandomState(int(seed))
     x, y, label = load_dataset(dataset_name, n_samples=n_samples, noise=noise)
     n = x.shape[0]
     x = x.astype(np.float64)
     y = y.astype(np.float64)
+    w, b = 0.0, 0.0
     x_min, x_max = float(np.min(x)), float(np.max(x))
+    loss_history = []
     for epoch in range(1, int(epochs) + 1):
         x, y = shuffle(x, y, random_state=rng)
         for start in range(0, n, int(batch_size)):
             end = min(start + int(batch_size), n)
+            xb, yb = x[start:end], y[start:end]
             yhat = w * xb + b
             err = yb - yhat
             dw = -(2.0 / xb.size) * np.sum(xb * err)
             db = -(2.0 / xb.size) * np.sum(err)
             w -= lr * dw
             b -= lr * db
         y_pred = w * x + b
         mse = float(np.mean((y - y_pred) ** 2))
+        loss_history.append(mse)
+        # Plot scatter + regressielijn
+        fig1 = plt.figure(figsize=(6, 4))
+        ax1 = fig1.add_subplot(111)
         ax1.scatter(x, y, alpha=0.6, s=18)
         xs = np.linspace(x_min, x_max, 200)
         ax1.plot(xs, w * xs + b, linewidth=2)
         ax1.grid(True, linestyle=":", linewidth=0.6)
         plt.tight_layout()
+        # Plot loss curve
+        fig2 = plt.figure(figsize=(6, 4))
+        ax2 = fig2.add_subplot(111)
+        ax2.plot(range(1, epoch + 1), loss_history, marker="o")
+        ax2.set_title("Loss curve (MSE per epoch)")
         ax2.set_xlabel("Epoch")
         ax2.set_ylabel("MSE")
         ax2.grid(True, linestyle=":", linewidth=0.6)
         plt.tight_layout()
+        yield fig1, fig2, f"w = {w:.4f}, b = {b:.4f}, MSE = {mse:.4f}"
 # ------------------------------
 # Uitlegtekst
 # ------------------------------
 THEORY_MD = r"""
 ### Wat is supervised learning?
+Bij **supervised learning** leer je een model aan de hand van voorbeeldparen *(input → gewenste output)*. Het doel is een functie te vinden die de relatie tussen input en output goed benadert.
 ### Lineaire regressie in 1D
 We passen een lijn \( y = w x + b \) aan op data. We minimaliseren de **Mean Squared Error (MSE)**:
 We gebruiken **stochastic gradient descent (SGD)** om \(w\) en \(b\) stapje voor stapje te verbeteren.
 """
 # ------------------------------
 # Gradio UI
 # ------------------------------
                     seed = gr.Slider(0, 9999, value=42, step=1, label="Random seed")
                     train_btn = gr.Button("Train live")
                 with gr.Column(scale=2):
+                    plot_data = gr.Plot(label="Data & regressielijn (live)")
+                    plot_loss = gr.Plot(label="Loss curve (MSE)")
                     metrics = gr.Markdown()
             train_btn.click(
                 fn=sgd_train_generator,
                 inputs=[dataset, lr, epochs, batch, n_samples, noise, seed],
+                outputs=[plot_data, plot_loss, metrics]
             )
 if __name__ == "__main__":