eaglelandsonce commited on
Commit
c21e478
·
verified ·
1 Parent(s): 737b5c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -6
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import io
2
  import random
 
3
  from dataclasses import dataclass
4
 
5
  import gradio as gr
@@ -64,6 +65,29 @@ def fig_to_image(fig) -> np.ndarray:
64
  return image
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def train_raw_pytorch(
68
  n_samples: int,
69
  noise_std: float,
@@ -79,6 +103,10 @@ def train_raw_pytorch(
79
  spec = DataSpec(n_samples=n_samples, n_features=10, noise_std=noise_std, train_frac=0.8)
80
  X_train, y_train, X_val, y_val, w_true, b_true = make_synthetic_regression(spec, seed=seed)
81
 
 
 
 
 
82
  # Data preview (first 20 rows from training split)
83
  preview_n = min(20, X_train.shape[0])
84
  df_preview = pd.DataFrame(
@@ -205,7 +233,8 @@ def train_raw_pytorch(
205
  f"Final train loss: {train_losses[-1]:.6f}\n"
206
  f"Final val loss: {val_losses[-1]:.6f}\n\n"
207
  f"True bias (b_true): {float(b_true.item()):.4f}\n"
208
- f"Learned bias (b_learned): {float(b_learned.item()):.4f}\n"
 
209
  )
210
 
211
  raw_loop_snippet = """# Raw PyTorch: requires manual training loop
@@ -224,7 +253,8 @@ for x, y in dataloader:
224
  optimizer.step()
225
  """
226
 
227
- return loss_plot, df_weights, summary, raw_loop_snippet, df_preview
 
228
 
229
 
230
  with gr.Blocks(title="Raw PyTorch Training Loop Demo") as demo:
@@ -238,7 +268,7 @@ This Space generates **synthetic data** each run:
238
  y = Xw + b + \\text{noise}
239
  \]
240
 
241
- Use **Data Preview** to see the first rows of the generated dataset.
242
  """
243
  )
244
 
@@ -261,10 +291,11 @@ Use **Data Preview** to see the first rows of the generated dataset.
261
  with gr.Tab("Outputs"):
262
  loss_img = gr.Image(label="Loss Curve", type="numpy")
263
  weights_df = gr.Dataframe(label="Weights: True vs Learned (sorted by abs error)", wrap=True)
264
- summary_txt = gr.Textbox(label="Summary", lines=8)
265
 
266
  with gr.Tab("Data Preview"):
267
- data_preview = gr.Dataframe(label="First 20 rows of the generated training data (X features + y)", wrap=True)
 
268
 
269
  with gr.Tab("Raw Loop Snippet"):
270
  snippet = gr.Code(label="Your original loop (as runnable reference)", language="python")
@@ -272,7 +303,7 @@ Use **Data Preview** to see the first rows of the generated dataset.
272
  run_btn.click(
273
  fn=train_raw_pytorch,
274
  inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
275
- outputs=[loss_img, weights_df, summary_txt, snippet, data_preview],
276
  )
277
 
278
  if __name__ == "__main__":
 
1
  import io
2
  import random
3
+ import tempfile
4
  from dataclasses import dataclass
5
 
6
  import gradio as gr
 
65
  return image
66
 
67
 
68
+ def build_full_dataset_df(X_train, y_train, X_val, y_val) -> pd.DataFrame:
69
+ """Create a single DataFrame with a 'split' column so it’s easy to teach/train/export."""
70
+ cols = [f"x{i}" for i in range(10)]
71
+
72
+ train_df = pd.DataFrame(X_train.cpu().numpy(), columns=cols)
73
+ train_df["y"] = y_train.cpu().numpy().reshape(-1)
74
+ train_df["split"] = "train"
75
+
76
+ val_df = pd.DataFrame(X_val.cpu().numpy(), columns=cols)
77
+ val_df["y"] = y_val.cpu().numpy().reshape(-1)
78
+ val_df["split"] = "val"
79
+
80
+ full_df = pd.concat([train_df, val_df], axis=0, ignore_index=True)
81
+ return full_df
82
+
83
+
84
+ def save_df_to_temp_csv(df: pd.DataFrame) -> str:
85
+ """Save DataFrame to a temp CSV and return the file path for Gradio download."""
86
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="synthetic_linear_regression_")
87
+ df.to_csv(tmp.name, index=False)
88
+ return tmp.name
89
+
90
+
91
  def train_raw_pytorch(
92
  n_samples: int,
93
  noise_std: float,
 
103
  spec = DataSpec(n_samples=n_samples, n_features=10, noise_std=noise_std, train_frac=0.8)
104
  X_train, y_train, X_val, y_val, w_true, b_true = make_synthetic_regression(spec, seed=seed)
105
 
106
+ # Full dataset CSV (train + val with split column)
107
+ full_df = build_full_dataset_df(X_train, y_train, X_val, y_val).round(4)
108
+ csv_path = save_df_to_temp_csv(full_df)
109
+
110
  # Data preview (first 20 rows from training split)
111
  preview_n = min(20, X_train.shape[0])
112
  df_preview = pd.DataFrame(
 
233
  f"Final train loss: {train_losses[-1]:.6f}\n"
234
  f"Final val loss: {val_losses[-1]:.6f}\n\n"
235
  f"True bias (b_true): {float(b_true.item()):.4f}\n"
236
+ f"Learned bias (b_learned): {float(b_learned.item()):.4f}\n\n"
237
+ f"Dataset CSV includes columns: x0..x9, y, split(train/val)\n"
238
  )
239
 
240
  raw_loop_snippet = """# Raw PyTorch: requires manual training loop
 
253
  optimizer.step()
254
  """
255
 
256
+ # Added csv_path as downloadable artifact
257
+ return loss_plot, df_weights, summary, raw_loop_snippet, df_preview, csv_path
258
 
259
 
260
  with gr.Blocks(title="Raw PyTorch Training Loop Demo") as demo:
 
268
  y = Xw + b + \\text{noise}
269
  \]
270
 
271
+ Go to **Data Preview** to see sample rows and **download the full dataset** as CSV.
272
  """
273
  )
274
 
 
291
  with gr.Tab("Outputs"):
292
  loss_img = gr.Image(label="Loss Curve", type="numpy")
293
  weights_df = gr.Dataframe(label="Weights: True vs Learned (sorted by abs error)", wrap=True)
294
+ summary_txt = gr.Textbox(label="Summary", lines=10)
295
 
296
  with gr.Tab("Data Preview"):
297
+ data_preview = gr.Dataframe(label="First 20 rows of generated TRAIN data (X features + y)", wrap=True)
298
+ download_file = gr.File(label="Download full dataset CSV (train + val)")
299
 
300
  with gr.Tab("Raw Loop Snippet"):
301
  snippet = gr.Code(label="Your original loop (as runnable reference)", language="python")
 
303
  run_btn.click(
304
  fn=train_raw_pytorch,
305
  inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
306
+ outputs=[loss_img, weights_df, summary_txt, snippet, data_preview, download_file],
307
  )
308
 
309
  if __name__ == "__main__":