Spaces:

eaglelandsonce
/

NMIST_PyTorch

Sleeping

App Files Files Community

eaglelandsonce commited on Mar 2

Commit

75aca80

verified ·

1 Parent(s): f839b6f

Create app.py

Browse files

Files changed (1) hide show

app.py +327 -0

app.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import os
+import time
+import json
+import threading
+import numpy as np
+from PIL import Image, ImageOps
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader, Subset
+from torchvision import datasets, transforms
+import gradio as gr
+# -----------------------------
+# Custom PyTorch model (nn.Module)
+# -----------------------------
+class MnistCNN(nn.Module):
+    def __init__(self, num_classes: int = 10, dropout: float = 0.25):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)    # 28x28 -> 28x28
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)   # 28x28 -> 28x28
+        self.pool = nn.MaxPool2d(2, 2)                             # 28x28 -> 14x14
+        self.dropout = nn.Dropout(dropout)
+        self.fc1 = nn.Linear(64 * 14 * 14, 128)
+        self.fc2 = nn.Linear(128, num_classes)
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1)
+        x = self.dropout(F.relu(self.fc1(x)))
+        return self.fc2(x)  # logits
+# -----------------------------
+# Global state
+# -----------------------------
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_LOCK = threading.Lock()
+MODEL = MnistCNN().to(DEVICE)
+WEIGHTS_PATH = "mnist_cnn.pth"
+CONFIG_PATH = "mnist_config.json"
+DEFAULT_CONFIG = {
+    "num_classes": 10,
+    "dropout": 0.25,
+    "normalize_mean": 0.1307,
+    "normalize_std": 0.3081,
+    "image_size": 28
+}
+# Use deterministic-ish behavior for demos (not perfect determinism on all systems)
+torch.manual_seed(42)
+np.random.seed(42)
+def save_config():
+    with open(CONFIG_PATH, "w") as f:
+        json.dump(DEFAULT_CONFIG, f, indent=2)
+def load_config():
+    if os.path.exists(CONFIG_PATH):
+        with open(CONFIG_PATH, "r") as f:
+            return json.load(f)
+    save_config()
+    return DEFAULT_CONFIG
+CFG = load_config()
+# -----------------------------
+# Utilities
+# -----------------------------
+def maybe_load_weights():
+    global MODEL
+    if os.path.exists(WEIGHTS_PATH):
+        state = torch.load(WEIGHTS_PATH, map_location=DEVICE)
+        with MODEL_LOCK:
+            MODEL.load_state_dict(state)
+            MODEL.eval()
+        return True
+    return False
+def preprocess_pil(img: Image.Image) -> torch.Tensor:
+    """
+    Converts a PIL image to MNIST-like tensor: (1,1,28,28), normalized.
+    Also attempts to handle "black ink on white background" by auto-inverting.
+    """
+    if img is None:
+        raise ValueError("No image provided.")
+    # Convert to grayscale
+    img = img.convert("L")
+    # Resize to 28x28
+    img = img.resize((CFG["image_size"], CFG["image_size"]))
+    # Convert to numpy [0..1]
+    arr = np.array(img).astype(np.float32) / 255.0
+    # Auto-invert if background looks white-ish (common with sketch tools)
+    # MNIST digits are typically bright strokes on darker background.
+    if arr.mean() > 0.5:
+        arr = 1.0 - arr
+    # Normalize like training
+    arr = (arr - CFG["normalize_mean"]) / CFG["normalize_std"]
+    # Shape to (1,1,28,28)
+    x = torch.from_numpy(arr).unsqueeze(0).unsqueeze(0)
+    return x.to(DEVICE)
+def predict_digit(img: Image.Image):
+    global MODEL
+    if img is None:
+        return "No image", {}
+    x = preprocess_pil(img)
+    with MODEL_LOCK:
+        MODEL.eval()
+        with torch.no_grad():
+            logits = MODEL(x)
+            probs = torch.softmax(logits, dim=1).cpu().numpy().squeeze(0)
+    pred = int(np.argmax(probs))
+    prob_dict = {str(i): float(probs[i]) for i in range(10)}
+    return pred, prob_dict
+# -----------------------------
+# Training
+# -----------------------------
+def get_dataloaders(batch_size: int, max_train_samples: int, max_test_samples: int):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((CFG["normalize_mean"],), (CFG["normalize_std"],))
+    ])
+    train_ds = datasets.MNIST(root="data", train=True, download=True, transform=transform)
+    test_ds  = datasets.MNIST(root="data", train=False, download=True, transform=transform)
+    # Subset for faster training on Spaces (optional)
+    if max_train_samples and max_train_samples < len(train_ds):
+        train_ds = Subset(train_ds, range(max_train_samples))
+    if max_test_samples and max_test_samples < len(test_ds):
+        test_ds = Subset(test_ds, range(max_test_samples))
+    # num_workers=0 is safest in Spaces
+    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
+    test_dl  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)
+    return train_dl, test_dl
+def evaluate(model: nn.Module, test_dl: DataLoader):
+    model.eval()
+    correct = 0
+    total = 0
+    loss_sum = 0.0
+    criterion = nn.CrossEntropyLoss()
+    with torch.no_grad():
+        for x, y in test_dl:
+            x, y = x.to(DEVICE), y.to(DEVICE)
+            logits = model(x)
+            loss = criterion(logits, y)
+            loss_sum += loss.item()
+            preds = logits.argmax(dim=1)
+            correct += (preds == y).sum().item()
+            total += y.numel()
+    avg_loss = loss_sum / max(1, len(test_dl))
+    acc = correct / max(1, total)
+    return avg_loss, acc
+def train_mnist(epochs: int, lr: float, batch_size: int, max_train_samples: int, max_test_samples: int, progress=gr.Progress()):
+    global MODEL
+    train_dl, test_dl = get_dataloaders(batch_size, max_train_samples, max_test_samples)
+    # Re-init model each time you train (simple + predictable)
+    model = MnistCNN(num_classes=CFG["num_classes"], dropout=CFG["dropout"]).to(DEVICE)
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+    criterion = nn.CrossEntropyLoss()
+    logs = []
+    start = time.time()
+    for epoch in range(1, epochs + 1):
+        model.train()
+        running_loss = 0.0
+        correct = 0
+        total = 0
+        for step, (x, y) in enumerate(progress.tqdm(train_dl, desc=f"Epoch {epoch}/{epochs}")):
+            x, y = x.to(DEVICE), y.to(DEVICE)
+            optimizer.zero_grad()
+            logits = model(x)
+            loss = criterion(logits, y)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+            preds = logits.argmax(dim=1)
+            correct += (preds == y).sum().item()
+            total += y.numel()
+        train_loss = running_loss / max(1, len(train_dl))
+        train_acc = correct / max(1, total)
+        test_loss, test_acc = evaluate(model, test_dl)
+        logs.append(
+            f"Epoch {epoch}/{epochs} | "
+            f"train loss {train_loss:.4f} acc {train_acc:.4f} | "
+            f"test loss {test_loss:.4f} acc {test_acc:.4f}"
+        )
+    # Save weights locally
+    torch.save(model.state_dict(), WEIGHTS_PATH)
+    save_config()
+    # Swap global model
+    with MODEL_LOCK:
+        MODEL.load_state_dict(model.state_dict())
+        MODEL.eval()
+    elapsed = time.time() - start
+    header = f"Done. Saved weights to `{WEIGHTS_PATH}`. Device: {DEVICE}. Time: {elapsed:.1f}s\n"
+    return header + "\n".join(logs)
+def load_saved_weights_ui():
+    ok = maybe_load_weights()
+    if ok:
+        return f"Loaded saved weights from `{WEIGHTS_PATH}`."
+    return f"No saved weights found at `{WEIGHTS_PATH}`. Train first."
+# Try to load weights at startup (if present)
+_ = maybe_load_weights()
+# -----------------------------
+# Gradio UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# MNIST (Custom `nn.Module`) — Train + Predict (PyTorch + Gradio)")
+    gr.Markdown(
+        "Use **Train** to fit a small CNN on MNIST. Then **draw** or **upload** a digit to predict.\n\n"
+        f"- Running on: `{DEVICE}`\n"
+        f"- Weights file: `{WEIGHTS_PATH}`"
+    )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("## 1) Train (optional)")
+            epochs = gr.Slider(1, 5, value=1, step=1, label="Epochs (start with 1)")
+            lr = gr.Number(value=1e-3, label="Learning rate", precision=6)
+            batch = gr.Slider(32, 256, value=128, step=32, label="Batch size")
+            gr.Markdown("### Speed controls (use smaller values for faster training)")
+            max_train = gr.Slider(1000, 60000, value=10000, step=1000, label="Max train samples")
+            max_test = gr.Slider(500, 10000, value=2000, step=500, label="Max test samples")
+            train_btn = gr.Button("Train model")
+            load_btn = gr.Button("Load saved weights")
+            train_log = gr.Textbox(label="Training log", lines=10)
+            status = gr.Textbox(label="Status", lines=2)
+        with gr.Column():
+            gr.Markdown("## 2) Predict")
+            with gr.Tab("Draw"):
+                draw_img = gr.Image(source="canvas", tool="sketch", type="pil", label="Draw a digit (0-9)")
+                draw_btn = gr.Button("Predict from drawing")
+            with gr.Tab("Upload"):
+                up_img = gr.Image(source="upload", type="pil", label="Upload an image of a digit")
+                up_btn = gr.Button("Predict from upload")
+            pred_out = gr.Number(label="Prediction")
+            prob_out = gr.Label(num_top_classes=3, label="Probabilities (top 3)")
+    # Wiring
+    train_btn.click(
+        fn=train_mnist,
+        inputs=[epochs, lr, batch, max_train, max_test],
+        outputs=[train_log],
+    ).then(
+        fn=lambda: "Training complete. You can now predict.",
+        inputs=[],
+        outputs=[status],
+    )
+    load_btn.click(
+        fn=load_saved_weights_ui,
+        inputs=[],
+        outputs=[status],
+    )
+    draw_btn.click(
+        fn=predict_digit,
+        inputs=[draw_img],
+        outputs=[pred_out, prob_out],
+    )
+    up_btn.click(
+        fn=predict_digit,
+        inputs=[up_img],
+        outputs=[pred_out, prob_out],
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)