eaglelandsonce commited on
Commit
c042a55
·
verified ·
1 Parent(s): 29a4eb4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +357 -0
app.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import tempfile
4
+ import uuid
5
+
6
+ import gradio as gr
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import pandas as pd
10
+ import torch
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, TensorDataset
13
+
14
+
15
+ def _pick_device(device_choice: str) -> torch.device:
16
+ if device_choice == "cuda":
17
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ if device_choice == "cpu":
19
+ return torch.device("cpu")
20
+ # auto
21
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+
23
+
24
+ def make_synthetic_regression(n_samples: int, noise_std: float, seed: int):
25
+ """
26
+ X shape: (n_samples, 10)
27
+ y = X @ w_true + b_true + noise
28
+ """
29
+ n_features = 10
30
+ g = torch.Generator().manual_seed(int(seed))
31
+
32
+ X = torch.randn(n_samples, n_features, generator=g)
33
+ w_true = torch.randn(n_features, 1, generator=g)
34
+ b_true = torch.randn(1, generator=g)
35
+
36
+ noise = noise_std * torch.randn(n_samples, 1, generator=g)
37
+ y = X @ w_true + b_true + noise
38
+
39
+ # 80/20 split (shuffled)
40
+ idx = torch.randperm(n_samples, generator=g)
41
+ n_train = int(round(0.8 * n_samples))
42
+ train_idx = idx[:n_train]
43
+ val_idx = idx[n_train:]
44
+
45
+ X_train, y_train = X[train_idx], y[train_idx]
46
+ X_val, y_val = X[val_idx], y[val_idx]
47
+
48
+ # Full dataframe for CSV download
49
+ cols = [f"x{i}" for i in range(n_features)]
50
+ df = pd.DataFrame(X.numpy(), columns=cols)
51
+ df["y"] = y.numpy().reshape(-1)
52
+ split = np.array(["val"] * n_samples, dtype=object)
53
+ split[train_idx.numpy()] = "train"
54
+ df["split"] = split
55
+
56
+ # Data preview: first 20 TRAIN rows
57
+ df_train_preview = df[df["split"] == "train"].head(20).reset_index(drop=True)
58
+
59
+ return (X_train, y_train, X_val, y_val, w_true, b_true, df, df_train_preview)
60
+
61
+
62
+ def train_raw_pytorch_loop(
63
+ X_train: torch.Tensor,
64
+ y_train: torch.Tensor,
65
+ X_val: torch.Tensor,
66
+ y_val: torch.Tensor,
67
+ lr: float,
68
+ batch_size: int,
69
+ epochs: int,
70
+ seed: int,
71
+ device: torch.device,
72
+ ):
73
+ # Ensure deterministic-ish behavior for model init
74
+ torch.manual_seed(int(seed) + 12345)
75
+
76
+ model = nn.Linear(10, 1).to(device)
77
+ loss_fn = nn.MSELoss()
78
+ optimizer = torch.optim.SGD(model.parameters(), lr=lr)
79
+
80
+ train_loader = DataLoader(
81
+ TensorDataset(X_train, y_train),
82
+ batch_size=batch_size,
83
+ shuffle=True,
84
+ drop_last=False,
85
+ )
86
+ val_loader = DataLoader(
87
+ TensorDataset(X_val, y_val),
88
+ batch_size=batch_size,
89
+ shuffle=False,
90
+ drop_last=False,
91
+ )
92
+
93
+ train_losses = []
94
+ val_losses = []
95
+
96
+ for _epoch in range(epochs):
97
+ # ---- TRAIN ----
98
+ model.train()
99
+ running = 0.0
100
+ n_seen = 0
101
+
102
+ for xb, yb in train_loader:
103
+ xb = xb.to(device)
104
+ yb = yb.to(device)
105
+
106
+ # Manual training loop steps:
107
+ optimizer.zero_grad() # 1) zero_grad
108
+ y_pred = model(xb) # 2) forward
109
+ loss = loss_fn(y_pred, yb) # 3) loss
110
+ loss.backward() # 4) backward
111
+ optimizer.step() # 5) step
112
+
113
+ bs = xb.shape[0]
114
+ running += loss.item() * bs
115
+ n_seen += bs
116
+
117
+ train_losses.append(running / max(1, n_seen))
118
+
119
+ # ---- VAL ----
120
+ model.eval()
121
+ running = 0.0
122
+ n_seen = 0
123
+ with torch.no_grad():
124
+ for xb, yb in val_loader:
125
+ xb = xb.to(device)
126
+ yb = yb.to(device)
127
+ y_pred = model(xb)
128
+ loss = loss_fn(y_pred, yb)
129
+ bs = xb.shape[0]
130
+ running += loss.item() * bs
131
+ n_seen += bs
132
+
133
+ val_losses.append(running / max(1, n_seen))
134
+
135
+ return model, train_losses, val_losses
136
+
137
+
138
+ def build_weight_comparison(w_true: torch.Tensor, b_true: torch.Tensor, model: nn.Linear):
139
+ w_learned = model.weight.detach().cpu().numpy().reshape(-1)
140
+ b_learned = float(model.bias.detach().cpu().numpy().reshape(-1)[0])
141
+
142
+ w_true_np = w_true.detach().cpu().numpy().reshape(-1)
143
+ b_true_np = float(b_true.detach().cpu().numpy().reshape(-1)[0])
144
+
145
+ rows = []
146
+ for i in range(10):
147
+ rows.append(
148
+ {
149
+ "param": f"w[{i}] (x{i})",
150
+ "true": float(w_true_np[i]),
151
+ "learned": float(w_learned[i]),
152
+ "abs_error": float(abs(w_true_np[i] - w_learned[i])),
153
+ }
154
+ )
155
+ rows.append(
156
+ {
157
+ "param": "bias (b)",
158
+ "true": b_true_np,
159
+ "learned": b_learned,
160
+ "abs_error": float(abs(b_true_np - b_learned)),
161
+ }
162
+ )
163
+ return pd.DataFrame(rows)
164
+
165
+
166
+ def make_loss_plot(train_losses, val_losses):
167
+ fig, ax = plt.subplots()
168
+ xs = np.arange(1, len(train_losses) + 1)
169
+ ax.plot(xs, train_losses, label="train")
170
+ ax.plot(xs, val_losses, label="val")
171
+ ax.set_title("Raw PyTorch Training Loop (Linear Regression)")
172
+ ax.set_xlabel("Epoch")
173
+ ax.set_ylabel("MSE Loss")
174
+ ax.legend()
175
+ ax.grid(True, alpha=0.3)
176
+ fig.tight_layout()
177
+ return fig
178
+
179
+
180
+ def run_experiment(n_samples, noise_std, lr, batch_size, epochs, seed, device_choice):
181
+ # sanitize
182
+ n_samples = int(n_samples)
183
+ batch_size = int(batch_size)
184
+ epochs = int(epochs)
185
+ seed = int(seed)
186
+ noise_std = float(noise_std)
187
+ lr = float(lr)
188
+
189
+ device = _pick_device(device_choice)
190
+
191
+ X_train, y_train, X_val, y_val, w_true, b_true, df_full, df_train_preview = make_synthetic_regression(
192
+ n_samples=n_samples,
193
+ noise_std=noise_std,
194
+ seed=seed,
195
+ )
196
+
197
+ model, train_losses, val_losses = train_raw_pytorch_loop(
198
+ X_train=X_train,
199
+ y_train=y_train,
200
+ X_val=X_val,
201
+ y_val=y_val,
202
+ lr=lr,
203
+ batch_size=batch_size,
204
+ epochs=epochs,
205
+ seed=seed,
206
+ device=device,
207
+ )
208
+
209
+ fig = make_loss_plot(train_losses, val_losses)
210
+ w_table = build_weight_comparison(w_true, b_true, model)
211
+
212
+ # Save dataset CSV for download
213
+ out_path = os.path.join(
214
+ tempfile.gettempdir(),
215
+ f"synthetic_regression_{uuid.uuid4().hex}.csv",
216
+ )
217
+ df_full.to_csv(out_path, index=False)
218
+
219
+ summary = (
220
+ "Raw PyTorch loop steps used each batch:\n"
221
+ " optimizer.zero_grad() -> model(x) -> loss_fn(...) -> loss.backward() -> optimizer.step()\n\n"
222
+ f"Device used: {device.type}\n"
223
+ f"Samples: {n_samples} (train={int(round(0.8*n_samples))}, val={n_samples-int(round(0.8*n_samples))})\n"
224
+ f"Noise std: {noise_std}\n"
225
+ f"LR: {lr}, Batch size: {batch_size}, Epochs: {epochs}, Seed: {seed}\n\n"
226
+ f"Final train loss: {train_losses[-1]:.6f}\n"
227
+ f"Final val loss: {val_losses[-1]:.6f}\n"
228
+ )
229
+
230
+ return fig, w_table, summary, df_train_preview, out_path
231
+
232
+
233
+ def build_ui():
234
+ available_devices = ["auto", "cpu"]
235
+ if torch.cuda.is_available():
236
+ available_devices.append("cuda")
237
+
238
+ with gr.Blocks(title="Raw PyTorch Training Loop (Gradio)") as demo:
239
+ gr.Markdown(
240
+ """
241
+ # Raw PyTorch Training Loop (Linear Regression)
242
+ This Space generates a fresh synthetic regression dataset each run and trains a `nn.Linear(10, 1)` model using a **manual** PyTorch training loop.
243
+ """
244
+ )
245
+
246
+ with gr.Tabs():
247
+ with gr.Tab("Train & Results"):
248
+ with gr.Row():
249
+ with gr.Column(scale=1):
250
+ n_samples = gr.Slider(
251
+ minimum=200,
252
+ maximum=20000,
253
+ value=2000,
254
+ step=100,
255
+ label="n_samples",
256
+ )
257
+ noise_std = gr.Slider(
258
+ minimum=0.0,
259
+ maximum=5.0,
260
+ value=1.0,
261
+ step=0.05,
262
+ label="noise_std",
263
+ )
264
+ lr = gr.Number(value=0.01, label="lr (SGD learning rate)", precision=6)
265
+ batch_size = gr.Slider(
266
+ minimum=8,
267
+ maximum=1024,
268
+ value=64,
269
+ step=8,
270
+ label="batch_size",
271
+ )
272
+ epochs = gr.Slider(
273
+ minimum=1,
274
+ maximum=200,
275
+ value=20,
276
+ step=1,
277
+ label="epochs",
278
+ )
279
+ seed = gr.Number(value=42, label="seed", precision=0)
280
+ device_choice = gr.Dropdown(
281
+ choices=available_devices,
282
+ value="auto",
283
+ label="device (cpu/cuda if available)",
284
+ )
285
+ run_btn = gr.Button("Run training")
286
+
287
+ with gr.Column(scale=2):
288
+ loss_plot = gr.Plot(label="Loss curve (train vs val)")
289
+ w_compare = gr.Dataframe(
290
+ label="w_true vs w_learned (and bias)",
291
+ interactive=False,
292
+ wrap=True,
293
+ )
294
+ summary = gr.Textbox(
295
+ label="Summary",
296
+ lines=10,
297
+ interactive=False,
298
+ )
299
+ dataset_file = gr.File(
300
+ label="Download full dataset CSV (train+val): columns x0..x9, y, split",
301
+ interactive=False,
302
+ )
303
+
304
+ run_btn.click(
305
+ fn=run_experiment,
306
+ inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
307
+ outputs=[loss_plot, w_compare, summary, gr.State(), dataset_file],
308
+ )
309
+
310
+ # We need the Data Preview tab to show first 20 training rows.
311
+ # We'll store it in a hidden state then route it to the other tab via a small helper.
312
+ train_preview_state = gr.State()
313
+
314
+ def _capture_preview(fig, wtab, summ, preview_df, csv_path):
315
+ return fig, wtab, summ, preview_df, csv_path, preview_df
316
+
317
+ run_btn.click(
318
+ fn=_capture_preview,
319
+ inputs=[loss_plot, w_compare, summary, gr.State(), dataset_file],
320
+ outputs=[loss_plot, w_compare, summary, gr.State(), dataset_file, train_preview_state],
321
+ )
322
+
323
+ with gr.Tab("Data Preview"):
324
+ gr.Markdown("### First 20 rows from the **training split**")
325
+ preview_df = gr.Dataframe(
326
+ label="Training rows (first 20)",
327
+ interactive=False,
328
+ wrap=True,
329
+ )
330
+ # Update preview automatically after training run
331
+ def _show_preview(df):
332
+ if df is None:
333
+ return pd.DataFrame(columns=[f"x{i}" for i in range(10)] + ["y", "split"])
334
+ return df
335
+
336
+ demo.load(fn=_show_preview, inputs=[train_preview_state], outputs=[preview_df])
337
+
338
+ # Also allow a manual refresh button (handy on Spaces)
339
+ refresh = gr.Button("Refresh preview")
340
+ refresh.click(fn=_show_preview, inputs=[train_preview_state], outputs=[preview_df])
341
+
342
+ gr.Markdown(
343
+ """
344
+ **Notes**
345
+ - Dataset is regenerated each run (based on `seed`).
346
+ - Train/val split is 80/20 and uses `DataLoader`.
347
+ - Model: `nn.Linear(10,1)`, Loss: `nn.MSELoss()`, Optimizer: `torch.optim.SGD(lr=...)`.
348
+ """
349
+ )
350
+
351
+ return demo
352
+
353
+
354
+ if __name__ == "__main__":
355
+ demo = build_ui()
356
+ demo.queue()
357
+ demo.launch()