py_lightning / app.py
eaglelandsonce's picture
Update app.py
c21e478 verified
import io
import random
import tempfile
from dataclasses import dataclass
import gradio as gr
import matplotlib
matplotlib.use("Agg") # headless-friendly for Hugging Face Spaces
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
@dataclass
class DataSpec:
n_samples: int = 1024
n_features: int = 10
noise_std: float = 0.3
train_frac: float = 0.8
def set_seed(seed: int) -> None:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def make_synthetic_regression(spec: DataSpec, seed: int = 42):
"""
Create a simple linear regression dataset:
y = X @ w_true + b_true + noise
Shapes:
X: (n_samples, n_features)
y: (n_samples, 1)
"""
set_seed(seed)
# True parameters students can compare against
w_true = torch.randn(spec.n_features, 1) * 2.0
b_true = torch.randn(1) * 0.5
X = torch.randn(spec.n_samples, spec.n_features)
noise = torch.randn(spec.n_samples, 1) * spec.noise_std
y = X @ w_true + b_true + noise
# Train/val split
n_train = int(spec.n_samples * spec.train_frac)
X_train, y_train = X[:n_train], y[:n_train]
X_val, y_val = X[n_train:], y[n_train:]
return (X_train, y_train, X_val, y_val, w_true, b_true)
def fig_to_image(fig) -> np.ndarray:
"""Convert a matplotlib figure to a numpy RGB image."""
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
plt.close(fig)
buf.seek(0)
image = plt.imread(buf)
return image
def build_full_dataset_df(X_train, y_train, X_val, y_val) -> pd.DataFrame:
"""Create a single DataFrame with a 'split' column so it’s easy to teach/train/export."""
cols = [f"x{i}" for i in range(10)]
train_df = pd.DataFrame(X_train.cpu().numpy(), columns=cols)
train_df["y"] = y_train.cpu().numpy().reshape(-1)
train_df["split"] = "train"
val_df = pd.DataFrame(X_val.cpu().numpy(), columns=cols)
val_df["y"] = y_val.cpu().numpy().reshape(-1)
val_df["split"] = "val"
full_df = pd.concat([train_df, val_df], axis=0, ignore_index=True)
return full_df
def save_df_to_temp_csv(df: pd.DataFrame) -> str:
"""Save DataFrame to a temp CSV and return the file path for Gradio download."""
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="synthetic_linear_regression_")
df.to_csv(tmp.name, index=False)
return tmp.name
def train_raw_pytorch(
n_samples: int,
noise_std: float,
lr: float,
batch_size: int,
epochs: int,
seed: int,
device_choice: str,
):
# ----------------------------
# 1) Data
# ----------------------------
spec = DataSpec(n_samples=n_samples, n_features=10, noise_std=noise_std, train_frac=0.8)
X_train, y_train, X_val, y_val, w_true, b_true = make_synthetic_regression(spec, seed=seed)
# Full dataset CSV (train + val with split column)
full_df = build_full_dataset_df(X_train, y_train, X_val, y_val).round(4)
csv_path = save_df_to_temp_csv(full_df)
# Data preview (first 20 rows from training split)
preview_n = min(20, X_train.shape[0])
df_preview = pd.DataFrame(
X_train[:preview_n].cpu().numpy(),
columns=[f"x{i}" for i in range(10)]
)
df_preview["y"] = y_train[:preview_n].cpu().numpy().reshape(-1)
df_preview = df_preview.round(4)
train_loader = DataLoader(
TensorDataset(X_train, y_train),
batch_size=batch_size,
shuffle=True,
drop_last=False,
)
val_loader = DataLoader(
TensorDataset(X_val, y_val),
batch_size=batch_size,
shuffle=False,
drop_last=False,
)
# ----------------------------
# 2) Model, optimizer, loss
# ----------------------------
model = nn.Linear(10, 1)
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# Device handling (CPU by default; CUDA if available & selected)
if device_choice == "cuda" and torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model.to(device)
w_true = w_true.to(device)
b_true = b_true.to(device)
# ----------------------------
# 3) Raw PyTorch training loop
# ----------------------------
train_losses = []
val_losses = []
for epoch in range(1, epochs + 1):
# ---- training
model.train()
running = 0.0
seen = 0
for x, y in train_loader:
x = x.to(device)
y = y.to(device)
optimizer.zero_grad() # (1) reset grads
y_pred = model(x) # (2) forward
loss = loss_fn(y_pred, y) # (3) compute loss
loss.backward() # (4) backprop
optimizer.step() # (5) update weights
batch_size_actual = x.size(0)
running += loss.item() * batch_size_actual
seen += batch_size_actual
avg_train = running / max(seen, 1)
train_losses.append(avg_train)
# ---- validation
model.eval()
running = 0.0
seen = 0
with torch.no_grad():
for x, y in val_loader:
x = x.to(device)
y = y.to(device)
y_pred = model(x)
loss = loss_fn(y_pred, y)
batch_size_actual = x.size(0)
running += loss.item() * batch_size_actual
seen += batch_size_actual
avg_val = running / max(seen, 1)
val_losses.append(avg_val)
# ----------------------------
# 4) Results for students
# ----------------------------
# Loss curve plot
fig = plt.figure()
plt.plot(range(1, epochs + 1), train_losses, marker="o", label="train")
plt.plot(range(1, epochs + 1), val_losses, marker="o", label="val")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Raw PyTorch Training Loop (Linear Regression)")
plt.grid(True, alpha=0.3)
plt.legend()
loss_plot = fig_to_image(fig)
# Learned parameters vs. true parameters
with torch.no_grad():
w_learned = model.weight.detach().view(-1, 1) # shape (10,1)
b_learned = model.bias.detach().view(1)
rows = []
for i in range(10):
rows.append(
{
"feature": f"x{i}",
"w_true": float(w_true[i].item()),
"w_learned": float(w_learned[i].item()),
"abs_error": float(abs(w_true[i].item() - w_learned[i].item())),
}
)
df_weights = pd.DataFrame(rows)
df_weights["abs_error"] = df_weights["abs_error"].map(lambda v: round(v, 4))
df_weights["w_true"] = df_weights["w_true"].map(lambda v: round(v, 4))
df_weights["w_learned"] = df_weights["w_learned"].map(lambda v: round(v, 4))
df_weights = df_weights.sort_values("abs_error", ascending=False).reset_index(drop=True)
summary = (
f"Device: {device}\n"
f"Final train loss: {train_losses[-1]:.6f}\n"
f"Final val loss: {val_losses[-1]:.6f}\n\n"
f"True bias (b_true): {float(b_true.item()):.4f}\n"
f"Learned bias (b_learned): {float(b_learned.item()):.4f}\n\n"
f"Dataset CSV includes columns: x0..x9, y, split(train/val)\n"
)
raw_loop_snippet = """# Raw PyTorch: requires manual training loop
import torch
import torch.nn as nn
model = nn.Linear(10, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()
for x, y in dataloader:
optimizer.zero_grad()
y_pred = model(x)
loss = loss_fn(y_pred, y)
loss.backward()
optimizer.step()
"""
# Added csv_path as downloadable artifact
return loss_plot, df_weights, summary, raw_loop_snippet, df_preview, csv_path
with gr.Blocks(title="Raw PyTorch Training Loop Demo") as demo:
gr.Markdown(
"""
# Raw PyTorch Training Loop (Linear Regression)
This Space generates **synthetic data** each run:
\[
y = Xw + b + \\text{noise}
\]
Go to **Data Preview** to see sample rows and **download the full dataset** as CSV.
"""
)
with gr.Row():
n_samples = gr.Slider(256, 8192, value=1024, step=256, label="Number of samples")
noise_std = gr.Slider(0.0, 2.0, value=0.3, step=0.05, label="Noise (std dev)")
with gr.Row():
lr = gr.Slider(1e-4, 1.0, value=0.01, step=1e-4, label="Learning rate (SGD)")
batch_size = gr.Dropdown([16, 32, 64, 128, 256], value=64, label="Batch size")
with gr.Row():
epochs = gr.Slider(1, 50, value=10, step=1, label="Epochs")
seed = gr.Number(value=42, precision=0, label="Random seed")
device_choice = gr.Radio(["cpu", "cuda"], value="cpu", label="Device (cuda only if available)")
run_btn = gr.Button("Train Model", variant="primary")
with gr.Tab("Outputs"):
loss_img = gr.Image(label="Loss Curve", type="numpy")
weights_df = gr.Dataframe(label="Weights: True vs Learned (sorted by abs error)", wrap=True)
summary_txt = gr.Textbox(label="Summary", lines=10)
with gr.Tab("Data Preview"):
data_preview = gr.Dataframe(label="First 20 rows of generated TRAIN data (X features + y)", wrap=True)
download_file = gr.File(label="Download full dataset CSV (train + val)")
with gr.Tab("Raw Loop Snippet"):
snippet = gr.Code(label="Your original loop (as runnable reference)", language="python")
run_btn.click(
fn=train_raw_pytorch,
inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
outputs=[loss_img, weights_df, summary_txt, snippet, data_preview, download_file],
)
if __name__ == "__main__":
demo.launch()