# app.py
import os
import tempfile
import uuid

import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset


def _pick_device(device_choice: str) -> torch.device:
    if device_choice == "cuda":
        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device_choice == "cpu":
        return torch.device("cpu")
    # auto
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")


def make_synthetic_regression(n_samples: int, noise_std: float, seed: int):
    """
    X shape: (n_samples, 10)
    y = X @ w_true + b_true + noise
    """
    n_features = 10
    g = torch.Generator().manual_seed(int(seed))

    X = torch.randn(n_samples, n_features, generator=g)
    w_true = torch.randn(n_features, 1, generator=g)
    b_true = torch.randn(1, generator=g)

    noise = noise_std * torch.randn(n_samples, 1, generator=g)
    y = X @ w_true + b_true + noise

    # 80/20 split (shuffled)
    idx = torch.randperm(n_samples, generator=g)
    n_train = int(round(0.8 * n_samples))
    train_idx = idx[:n_train]
    val_idx = idx[n_train:]

    X_train, y_train = X[train_idx], y[train_idx]
    X_val, y_val = X[val_idx], y[val_idx]

    # Full dataframe for CSV download
    cols = [f"x{i}" for i in range(n_features)]
    df = pd.DataFrame(X.numpy(), columns=cols)
    df["y"] = y.numpy().reshape(-1)
    split = np.array(["val"] * n_samples, dtype=object)
    split[train_idx.numpy()] = "train"
    df["split"] = split

    # Data preview: first 20 TRAIN rows
    df_train_preview = df[df["split"] == "train"].head(20).reset_index(drop=True)

    return (X_train, y_train, X_val, y_val, w_true, b_true, df, df_train_preview)


def train_raw_pytorch_loop(
    X_train: torch.Tensor,
    y_train: torch.Tensor,
    X_val: torch.Tensor,
    y_val: torch.Tensor,
    lr: float,
    batch_size: int,
    epochs: int,
    seed: int,
    device: torch.device,
):
    # Ensure deterministic-ish behavior for model init
    torch.manual_seed(int(seed) + 12345)

    model = nn.Linear(10, 1).to(device)
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
    )
    val_loader = DataLoader(
        TensorDataset(X_val, y_val),
        batch_size=batch_size,
        shuffle=False,
        drop_last=False,
    )

    train_losses = []
    val_losses = []

    for _epoch in range(epochs):
        # ---- TRAIN ----
        model.train()
        running = 0.0
        n_seen = 0

        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            # Manual training loop steps:
            optimizer.zero_grad()           # 1) zero_grad
            y_pred = model(xb)              # 2) forward
            loss = loss_fn(y_pred, yb)      # 3) loss
            loss.backward()                 # 4) backward
            optimizer.step()                # 5) step

            bs = xb.shape[0]
            running += loss.item() * bs
            n_seen += bs

        train_losses.append(running / max(1, n_seen))

        # ---- VAL ----
        model.eval()
        running = 0.0
        n_seen = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(device)
                yb = yb.to(device)
                y_pred = model(xb)
                loss = loss_fn(y_pred, yb)
                bs = xb.shape[0]
                running += loss.item() * bs
                n_seen += bs

        val_losses.append(running / max(1, n_seen))

    return model, train_losses, val_losses


def build_weight_comparison(w_true: torch.Tensor, b_true: torch.Tensor, model: nn.Linear):
    w_learned = model.weight.detach().cpu().numpy().reshape(-1)
    b_learned = float(model.bias.detach().cpu().numpy().reshape(-1)[0])

    w_true_np = w_true.detach().cpu().numpy().reshape(-1)
    b_true_np = float(b_true.detach().cpu().numpy().reshape(-1)[0])

    rows = []
    for i in range(10):
        rows.append(
            {
                "param": f"w[{i}] (x{i})",
                "true": float(w_true_np[i]),
                "learned": float(w_learned[i]),
                "abs_error": float(abs(w_true_np[i] - w_learned[i])),
            }
        )
    rows.append(
        {
            "param": "bias (b)",
            "true": b_true_np,
            "learned": b_learned,
            "abs_error": float(abs(b_true_np - b_learned)),
        }
    )
    return pd.DataFrame(rows)


def make_loss_plot(train_losses, val_losses):
    fig, ax = plt.subplots()
    xs = np.arange(1, len(train_losses) + 1)
    ax.plot(xs, train_losses, label="train")
    ax.plot(xs, val_losses, label="val")
    ax.set_title("Raw PyTorch Training Loop (Linear Regression)")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("MSE Loss")
    ax.legend()
    ax.grid(True, alpha=0.3)
    fig.tight_layout()
    return fig


def run_experiment(n_samples, noise_std, lr, batch_size, epochs, seed, device_choice):
    # sanitize
    n_samples = int(n_samples)
    batch_size = int(batch_size)
    epochs = int(epochs)
    seed = int(seed)
    noise_std = float(noise_std)
    lr = float(lr)

    device = _pick_device(device_choice)

    X_train, y_train, X_val, y_val, w_true, b_true, df_full, df_train_preview = make_synthetic_regression(
        n_samples=n_samples,
        noise_std=noise_std,
        seed=seed,
    )

    model, train_losses, val_losses = train_raw_pytorch_loop(
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        lr=lr,
        batch_size=batch_size,
        epochs=epochs,
        seed=seed,
        device=device,
    )

    fig = make_loss_plot(train_losses, val_losses)
    w_table = build_weight_comparison(w_true, b_true, model)

    # Save dataset CSV for download
    out_path = os.path.join(
        tempfile.gettempdir(),
        f"synthetic_regression_{uuid.uuid4().hex}.csv",
    )
    df_full.to_csv(out_path, index=False)

    summary = (
        "Raw PyTorch loop steps used each batch:\n"
        "  optimizer.zero_grad() -> model(x) -> loss_fn(...) -> loss.backward() -> optimizer.step()\n\n"
        f"Device used: {device.type}\n"
        f"Samples: {n_samples} (train={int(round(0.8*n_samples))}, val={n_samples-int(round(0.8*n_samples))})\n"
        f"Noise std: {noise_std}\n"
        f"LR: {lr}, Batch size: {batch_size}, Epochs: {epochs}, Seed: {seed}\n\n"
        f"Final train loss: {train_losses[-1]:.6f}\n"
        f"Final val loss:   {val_losses[-1]:.6f}\n"
    )

    return fig, w_table, summary, df_train_preview, out_path


def build_ui():
    available_devices = ["auto", "cpu"]
    if torch.cuda.is_available():
        available_devices.append("cuda")

    with gr.Blocks(title="Raw PyTorch Training Loop (Gradio)") as demo:
        gr.Markdown(
            """
# Raw PyTorch Training Loop (Linear Regression)
This Space generates a fresh synthetic regression dataset each run and trains a `nn.Linear(10, 1)` model using a **manual** PyTorch training loop.
"""
        )

        with gr.Tabs():
            with gr.Tab("Train & Results"):
                with gr.Row():
                    with gr.Column(scale=1):
                        n_samples = gr.Slider(
                            minimum=200,
                            maximum=20000,
                            value=2000,
                            step=100,
                            label="n_samples",
                        )
                        noise_std = gr.Slider(
                            minimum=0.0,
                            maximum=5.0,
                            value=1.0,
                            step=0.05,
                            label="noise_std",
                        )
                        lr = gr.Number(value=0.01, label="lr (SGD learning rate)", precision=6)
                        batch_size = gr.Slider(
                            minimum=8,
                            maximum=1024,
                            value=64,
                            step=8,
                            label="batch_size",
                        )
                        epochs = gr.Slider(
                            minimum=1,
                            maximum=200,
                            value=20,
                            step=1,
                            label="epochs",
                        )
                        seed = gr.Number(value=42, label="seed", precision=0)
                        device_choice = gr.Dropdown(
                            choices=available_devices,
                            value="auto",
                            label="device (cpu/cuda if available)",
                        )
                        run_btn = gr.Button("Run training")

                    with gr.Column(scale=2):
                        loss_plot = gr.Plot(label="Loss curve (train vs val)")
                        w_compare = gr.Dataframe(
                            label="w_true vs w_learned (and bias)",
                            interactive=False,
                            wrap=True,
                        )
                        summary = gr.Textbox(
                            label="Summary",
                            lines=10,
                            interactive=False,
                        )
                        dataset_file = gr.File(
                            label="Download full dataset CSV (train+val): columns x0..x9, y, split",
                            interactive=False,
                        )

                run_btn.click(
                    fn=run_experiment,
                    inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
                    outputs=[loss_plot, w_compare, summary, gr.State(), dataset_file],
                )

                # We need the Data Preview tab to show first 20 training rows.
                # We'll store it in a hidden state then route it to the other tab via a small helper.
                train_preview_state = gr.State()

                def _capture_preview(fig, wtab, summ, preview_df, csv_path):
                    return fig, wtab, summ, preview_df, csv_path, preview_df

                run_btn.click(
                    fn=_capture_preview,
                    inputs=[loss_plot, w_compare, summary, gr.State(), dataset_file],
                    outputs=[loss_plot, w_compare, summary, gr.State(), dataset_file, train_preview_state],
                )

            with gr.Tab("Data Preview"):
                gr.Markdown("### First 20 rows from the **training split**")
                preview_df = gr.Dataframe(
                    label="Training rows (first 20)",
                    interactive=False,
                    wrap=True,
                )
                # Update preview automatically after training run
                def _show_preview(df):
                    if df is None:
                        return pd.DataFrame(columns=[f"x{i}" for i in range(10)] + ["y", "split"])
                    return df

                demo.load(fn=_show_preview, inputs=[train_preview_state], outputs=[preview_df])

                # Also allow a manual refresh button (handy on Spaces)
                refresh = gr.Button("Refresh preview")
                refresh.click(fn=_show_preview, inputs=[train_preview_state], outputs=[preview_df])

        gr.Markdown(
            """
**Notes**
- Dataset is regenerated each run (based on `seed`).
- Train/val split is 80/20 and uses `DataLoader`.
- Model: `nn.Linear(10,1)`, Loss: `nn.MSELoss()`, Optimizer: `torch.optim.SGD(lr=...)`.
"""
        )

    return demo


if __name__ == "__main__":
    demo = build_ui()
    demo.queue()
    demo.launch()