Spaces:

eaglelandsonce
/

py_lightning

Sleeping

App Files Files Community

py_lightning / app.py

eaglelandsonce

Update app.py

c21e478 verified about 1 month ago

raw

history blame contribute delete

9.86 kB

	import io
	import random
	import tempfile
	from dataclasses import dataclass

	import gradio as gr
	import matplotlib
	matplotlib.use("Agg") # headless-friendly for Hugging Face Spaces
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader, TensorDataset


	@dataclass
	class DataSpec:
	n_samples: int = 1024
	n_features: int = 10
	noise_std: float = 0.3
	train_frac: float = 0.8


	def set_seed(seed: int) -> None:
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)


	def make_synthetic_regression(spec: DataSpec, seed: int = 42):
	"""
	Create a simple linear regression dataset:
	y = X @ w_true + b_true + noise
	Shapes:
	X: (n_samples, n_features)
	y: (n_samples, 1)
	"""
	set_seed(seed)

	# True parameters students can compare against
	w_true = torch.randn(spec.n_features, 1) * 2.0
	b_true = torch.randn(1) * 0.5

	X = torch.randn(spec.n_samples, spec.n_features)
	noise = torch.randn(spec.n_samples, 1) * spec.noise_std
	y = X @ w_true + b_true + noise

	# Train/val split
	n_train = int(spec.n_samples * spec.train_frac)
	X_train, y_train = X[:n_train], y[:n_train]
	X_val, y_val = X[n_train:], y[n_train:]

	return (X_train, y_train, X_val, y_val, w_true, b_true)


	def fig_to_image(fig) -> np.ndarray:
	"""Convert a matplotlib figure to a numpy RGB image."""
	buf = io.BytesIO()
	fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
	plt.close(fig)
	buf.seek(0)
	image = plt.imread(buf)
	return image


	def build_full_dataset_df(X_train, y_train, X_val, y_val) -> pd.DataFrame:
	"""Create a single DataFrame with a 'split' column so it’s easy to teach/train/export."""
	cols = [f"x{i}" for i in range(10)]

	train_df = pd.DataFrame(X_train.cpu().numpy(), columns=cols)
	train_df["y"] = y_train.cpu().numpy().reshape(-1)
	train_df["split"] = "train"

	val_df = pd.DataFrame(X_val.cpu().numpy(), columns=cols)
	val_df["y"] = y_val.cpu().numpy().reshape(-1)
	val_df["split"] = "val"

	full_df = pd.concat([train_df, val_df], axis=0, ignore_index=True)
	return full_df


	def save_df_to_temp_csv(df: pd.DataFrame) -> str:
	"""Save DataFrame to a temp CSV and return the file path for Gradio download."""
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="synthetic_linear_regression_")
	df.to_csv(tmp.name, index=False)
	return tmp.name


	def train_raw_pytorch(
	n_samples: int,
	noise_std: float,
	lr: float,
	batch_size: int,
	epochs: int,
	seed: int,
	device_choice: str,
	):
	# ----------------------------
	# 1) Data
	# ----------------------------
	spec = DataSpec(n_samples=n_samples, n_features=10, noise_std=noise_std, train_frac=0.8)
	X_train, y_train, X_val, y_val, w_true, b_true = make_synthetic_regression(spec, seed=seed)

	# Full dataset CSV (train + val with split column)
	full_df = build_full_dataset_df(X_train, y_train, X_val, y_val).round(4)
	csv_path = save_df_to_temp_csv(full_df)

	# Data preview (first 20 rows from training split)
	preview_n = min(20, X_train.shape[0])
	df_preview = pd.DataFrame(
	X_train[:preview_n].cpu().numpy(),
	columns=[f"x{i}" for i in range(10)]
	)
	df_preview["y"] = y_train[:preview_n].cpu().numpy().reshape(-1)
	df_preview = df_preview.round(4)

	train_loader = DataLoader(
	TensorDataset(X_train, y_train),
	batch_size=batch_size,
	shuffle=True,
	drop_last=False,
	)
	val_loader = DataLoader(
	TensorDataset(X_val, y_val),
	batch_size=batch_size,
	shuffle=False,
	drop_last=False,
	)

	# ----------------------------
	# 2) Model, optimizer, loss
	# ----------------------------
	model = nn.Linear(10, 1)
	loss_fn = nn.MSELoss()
	optimizer = torch.optim.SGD(model.parameters(), lr=lr)

	# Device handling (CPU by default; CUDA if available & selected)
	if device_choice == "cuda" and torch.cuda.is_available():
	device = torch.device("cuda")
	else:
	device = torch.device("cpu")

	model.to(device)
	w_true = w_true.to(device)
	b_true = b_true.to(device)

	# ----------------------------
	# 3) Raw PyTorch training loop
	# ----------------------------
	train_losses = []
	val_losses = []

	for epoch in range(1, epochs + 1):
	# ---- training
	model.train()
	running = 0.0
	seen = 0

	for x, y in train_loader:
	x = x.to(device)
	y = y.to(device)

	optimizer.zero_grad() # (1) reset grads
	y_pred = model(x) # (2) forward
	loss = loss_fn(y_pred, y) # (3) compute loss
	loss.backward() # (4) backprop
	optimizer.step() # (5) update weights

	batch_size_actual = x.size(0)
	running += loss.item() * batch_size_actual
	seen += batch_size_actual

	avg_train = running / max(seen, 1)
	train_losses.append(avg_train)

	# ---- validation
	model.eval()
	running = 0.0
	seen = 0
	with torch.no_grad():
	for x, y in val_loader:
	x = x.to(device)
	y = y.to(device)
	y_pred = model(x)
	loss = loss_fn(y_pred, y)

	batch_size_actual = x.size(0)
	running += loss.item() * batch_size_actual
	seen += batch_size_actual

	avg_val = running / max(seen, 1)
	val_losses.append(avg_val)

	# ----------------------------
	# 4) Results for students
	# ----------------------------
	# Loss curve plot
	fig = plt.figure()
	plt.plot(range(1, epochs + 1), train_losses, marker="o", label="train")
	plt.plot(range(1, epochs + 1), val_losses, marker="o", label="val")
	plt.xlabel("Epoch")
	plt.ylabel("MSE Loss")
	plt.title("Raw PyTorch Training Loop (Linear Regression)")
	plt.grid(True, alpha=0.3)
	plt.legend()
	loss_plot = fig_to_image(fig)

	# Learned parameters vs. true parameters
	with torch.no_grad():
	w_learned = model.weight.detach().view(-1, 1) # shape (10,1)
	b_learned = model.bias.detach().view(1)

	rows = []
	for i in range(10):
	rows.append(
	{
	"feature": f"x{i}",
	"w_true": float(w_true[i].item()),
	"w_learned": float(w_learned[i].item()),
	"abs_error": float(abs(w_true[i].item() - w_learned[i].item())),
	}
	)
	df_weights = pd.DataFrame(rows)
	df_weights["abs_error"] = df_weights["abs_error"].map(lambda v: round(v, 4))
	df_weights["w_true"] = df_weights["w_true"].map(lambda v: round(v, 4))
	df_weights["w_learned"] = df_weights["w_learned"].map(lambda v: round(v, 4))
	df_weights = df_weights.sort_values("abs_error", ascending=False).reset_index(drop=True)

	summary = (
	f"Device: {device}\n"
	f"Final train loss: {train_losses[-1]:.6f}\n"
	f"Final val loss: {val_losses[-1]:.6f}\n\n"
	f"True bias (b_true): {float(b_true.item()):.4f}\n"
	f"Learned bias (b_learned): {float(b_learned.item()):.4f}\n\n"
	f"Dataset CSV includes columns: x0..x9, y, split(train/val)\n"
	)

	raw_loop_snippet = """# Raw PyTorch: requires manual training loop
	import torch
	import torch.nn as nn

	model = nn.Linear(10, 1)
	optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
	loss_fn = nn.MSELoss()

	for x, y in dataloader:
	optimizer.zero_grad()
	y_pred = model(x)
	loss = loss_fn(y_pred, y)
	loss.backward()
	optimizer.step()
	"""

	# Added csv_path as downloadable artifact
	return loss_plot, df_weights, summary, raw_loop_snippet, df_preview, csv_path


	with gr.Blocks(title="Raw PyTorch Training Loop Demo") as demo:
	gr.Markdown(
	"""
	# Raw PyTorch Training Loop (Linear Regression)

	This Space generates synthetic data each run:

	\[
	y = Xw + b + \\text{noise}
	\]

	Go to Data Preview to see sample rows and download the full dataset as CSV.
	"""
	)

	with gr.Row():
	n_samples = gr.Slider(256, 8192, value=1024, step=256, label="Number of samples")
	noise_std = gr.Slider(0.0, 2.0, value=0.3, step=0.05, label="Noise (std dev)")

	with gr.Row():
	lr = gr.Slider(1e-4, 1.0, value=0.01, step=1e-4, label="Learning rate (SGD)")
	batch_size = gr.Dropdown([16, 32, 64, 128, 256], value=64, label="Batch size")

	with gr.Row():
	epochs = gr.Slider(1, 50, value=10, step=1, label="Epochs")
	seed = gr.Number(value=42, precision=0, label="Random seed")

	device_choice = gr.Radio(["cpu", "cuda"], value="cpu", label="Device (cuda only if available)")

	run_btn = gr.Button("Train Model", variant="primary")

	with gr.Tab("Outputs"):
	loss_img = gr.Image(label="Loss Curve", type="numpy")
	weights_df = gr.Dataframe(label="Weights: True vs Learned (sorted by abs error)", wrap=True)
	summary_txt = gr.Textbox(label="Summary", lines=10)

	with gr.Tab("Data Preview"):
	data_preview = gr.Dataframe(label="First 20 rows of generated TRAIN data (X features + y)", wrap=True)
	download_file = gr.File(label="Download full dataset CSV (train + val)")

	with gr.Tab("Raw Loop Snippet"):
	snippet = gr.Code(label="Your original loop (as runnable reference)", language="python")

	run_btn.click(
	fn=train_raw_pytorch,
	inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
	outputs=[loss_img, weights_df, summary_txt, snippet, data_preview, download_file],
	)

	if __name__ == "__main__":
	demo.launch()