import io
import random
import tempfile
from dataclasses import dataclass

import gradio as gr
import matplotlib
matplotlib.use("Agg")  # headless-friendly for Hugging Face Spaces
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


@dataclass
class DataSpec:
    n_samples: int = 1024
    n_features: int = 10
    noise_std: float = 0.3
    train_frac: float = 0.8


def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def make_synthetic_regression(spec: DataSpec, seed: int = 42):
    """
    Create a simple linear regression dataset:
      y = X @ w_true + b_true + noise
    Shapes:
      X: (n_samples, n_features)
      y: (n_samples, 1)
    """
    set_seed(seed)

    # True parameters students can compare against
    w_true = torch.randn(spec.n_features, 1) * 2.0
    b_true = torch.randn(1) * 0.5

    X = torch.randn(spec.n_samples, spec.n_features)
    noise = torch.randn(spec.n_samples, 1) * spec.noise_std
    y = X @ w_true + b_true + noise

    # Train/val split
    n_train = int(spec.n_samples * spec.train_frac)
    X_train, y_train = X[:n_train], y[:n_train]
    X_val, y_val = X[n_train:], y[n_train:]

    return (X_train, y_train, X_val, y_val, w_true, b_true)


def fig_to_image(fig) -> np.ndarray:
    """Convert a matplotlib figure to a numpy RGB image."""
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
    plt.close(fig)
    buf.seek(0)
    image = plt.imread(buf)
    return image


def build_full_dataset_df(X_train, y_train, X_val, y_val) -> pd.DataFrame:
    """Create a single DataFrame with a 'split' column so it’s easy to teach/train/export."""
    cols = [f"x{i}" for i in range(10)]

    train_df = pd.DataFrame(X_train.cpu().numpy(), columns=cols)
    train_df["y"] = y_train.cpu().numpy().reshape(-1)
    train_df["split"] = "train"

    val_df = pd.DataFrame(X_val.cpu().numpy(), columns=cols)
    val_df["y"] = y_val.cpu().numpy().reshape(-1)
    val_df["split"] = "val"

    full_df = pd.concat([train_df, val_df], axis=0, ignore_index=True)
    return full_df


def save_df_to_temp_csv(df: pd.DataFrame) -> str:
    """Save DataFrame to a temp CSV and return the file path for Gradio download."""
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="synthetic_linear_regression_")
    df.to_csv(tmp.name, index=False)
    return tmp.name


def train_raw_pytorch(
    n_samples: int,
    noise_std: float,
    lr: float,
    batch_size: int,
    epochs: int,
    seed: int,
    device_choice: str,
):
    # ----------------------------
    # 1) Data
    # ----------------------------
    spec = DataSpec(n_samples=n_samples, n_features=10, noise_std=noise_std, train_frac=0.8)
    X_train, y_train, X_val, y_val, w_true, b_true = make_synthetic_regression(spec, seed=seed)

    # Full dataset CSV (train + val with split column)
    full_df = build_full_dataset_df(X_train, y_train, X_val, y_val).round(4)
    csv_path = save_df_to_temp_csv(full_df)

    # Data preview (first 20 rows from training split)
    preview_n = min(20, X_train.shape[0])
    df_preview = pd.DataFrame(
        X_train[:preview_n].cpu().numpy(),
        columns=[f"x{i}" for i in range(10)]
    )
    df_preview["y"] = y_train[:preview_n].cpu().numpy().reshape(-1)
    df_preview = df_preview.round(4)

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
    )
    val_loader = DataLoader(
        TensorDataset(X_val, y_val),
        batch_size=batch_size,
        shuffle=False,
        drop_last=False,
    )

    # ----------------------------
    # 2) Model, optimizer, loss
    # ----------------------------
    model = nn.Linear(10, 1)
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Device handling (CPU by default; CUDA if available & selected)
    if device_choice == "cuda" and torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model.to(device)
    w_true = w_true.to(device)
    b_true = b_true.to(device)

    # ----------------------------
    # 3) Raw PyTorch training loop
    # ----------------------------
    train_losses = []
    val_losses = []

    for epoch in range(1, epochs + 1):
        # ---- training
        model.train()
        running = 0.0
        seen = 0

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()      # (1) reset grads
            y_pred = model(x)          # (2) forward
            loss = loss_fn(y_pred, y)  # (3) compute loss
            loss.backward()            # (4) backprop
            optimizer.step()           # (5) update weights

            batch_size_actual = x.size(0)
            running += loss.item() * batch_size_actual
            seen += batch_size_actual

        avg_train = running / max(seen, 1)
        train_losses.append(avg_train)

        # ---- validation
        model.eval()
        running = 0.0
        seen = 0
        with torch.no_grad():
            for x, y in val_loader:
                x = x.to(device)
                y = y.to(device)
                y_pred = model(x)
                loss = loss_fn(y_pred, y)

                batch_size_actual = x.size(0)
                running += loss.item() * batch_size_actual
                seen += batch_size_actual

        avg_val = running / max(seen, 1)
        val_losses.append(avg_val)

    # ----------------------------
    # 4) Results for students
    # ----------------------------
    # Loss curve plot
    fig = plt.figure()
    plt.plot(range(1, epochs + 1), train_losses, marker="o", label="train")
    plt.plot(range(1, epochs + 1), val_losses, marker="o", label="val")
    plt.xlabel("Epoch")
    plt.ylabel("MSE Loss")
    plt.title("Raw PyTorch Training Loop (Linear Regression)")
    plt.grid(True, alpha=0.3)
    plt.legend()
    loss_plot = fig_to_image(fig)

    # Learned parameters vs. true parameters
    with torch.no_grad():
        w_learned = model.weight.detach().view(-1, 1)  # shape (10,1)
        b_learned = model.bias.detach().view(1)

    rows = []
    for i in range(10):
        rows.append(
            {
                "feature": f"x{i}",
                "w_true": float(w_true[i].item()),
                "w_learned": float(w_learned[i].item()),
                "abs_error": float(abs(w_true[i].item() - w_learned[i].item())),
            }
        )
    df_weights = pd.DataFrame(rows)
    df_weights["abs_error"] = df_weights["abs_error"].map(lambda v: round(v, 4))
    df_weights["w_true"] = df_weights["w_true"].map(lambda v: round(v, 4))
    df_weights["w_learned"] = df_weights["w_learned"].map(lambda v: round(v, 4))
    df_weights = df_weights.sort_values("abs_error", ascending=False).reset_index(drop=True)

    summary = (
        f"Device: {device}\n"
        f"Final train loss: {train_losses[-1]:.6f}\n"
        f"Final val loss:   {val_losses[-1]:.6f}\n\n"
        f"True bias (b_true):        {float(b_true.item()):.4f}\n"
        f"Learned bias (b_learned):  {float(b_learned.item()):.4f}\n\n"
        f"Dataset CSV includes columns: x0..x9, y, split(train/val)\n"
    )

    raw_loop_snippet = """# Raw PyTorch: requires manual training loop
import torch
import torch.nn as nn

model = nn.Linear(10, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

for x, y in dataloader:
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()
"""

    # Added csv_path as downloadable artifact
    return loss_plot, df_weights, summary, raw_loop_snippet, df_preview, csv_path


with gr.Blocks(title="Raw PyTorch Training Loop Demo") as demo:
    gr.Markdown(
        """
# Raw PyTorch Training Loop (Linear Regression)

This Space generates **synthetic data** each run:

\[
y = Xw + b + \\text{noise}
\]

Go to **Data Preview** to see sample rows and **download the full dataset** as CSV.
"""
    )

    with gr.Row():
        n_samples = gr.Slider(256, 8192, value=1024, step=256, label="Number of samples")
        noise_std = gr.Slider(0.0, 2.0, value=0.3, step=0.05, label="Noise (std dev)")

    with gr.Row():
        lr = gr.Slider(1e-4, 1.0, value=0.01, step=1e-4, label="Learning rate (SGD)")
        batch_size = gr.Dropdown([16, 32, 64, 128, 256], value=64, label="Batch size")

    with gr.Row():
        epochs = gr.Slider(1, 50, value=10, step=1, label="Epochs")
        seed = gr.Number(value=42, precision=0, label="Random seed")

    device_choice = gr.Radio(["cpu", "cuda"], value="cpu", label="Device (cuda only if available)")

    run_btn = gr.Button("Train Model", variant="primary")

    with gr.Tab("Outputs"):
        loss_img = gr.Image(label="Loss Curve", type="numpy")
        weights_df = gr.Dataframe(label="Weights: True vs Learned (sorted by abs error)", wrap=True)
        summary_txt = gr.Textbox(label="Summary", lines=10)

    with gr.Tab("Data Preview"):
        data_preview = gr.Dataframe(label="First 20 rows of generated TRAIN data (X features + y)", wrap=True)
        download_file = gr.File(label="Download full dataset CSV (train + val)")

    with gr.Tab("Raw Loop Snippet"):
        snippet = gr.Code(label="Your original loop (as runnable reference)", language="python")

    run_btn.click(
        fn=train_raw_pytorch,
        inputs=[n_samples, noise_std, lr, batch_size, epochs, seed, device_choice],
        outputs=[loss_img, weights_df, summary_txt, snippet, data_preview, download_file],
    )

if __name__ == "__main__":
    demo.launch()