Spaces:

Aumkeshchy2003
/

ViT_For_100_Class

Sleeping

Aumkeshchy2003 commited on Nov 30, 2025

Commit

1396142

verified ·

1 Parent(s): c14f0d5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,8 +7,7 @@ import gradio as gr
 from PIL import Image
 import math
-# ------------------------
-# Configuration (must match your trained model)
 cfg = {
     "image_size": 32,
     "patch_size": 4,
@@ -41,10 +40,7 @@ classes = [
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ------------------------
-# Model definition
 # ViT model implementation
-# --- Conv stem (replace PatchEmbed) ---
 class ConvPatchEmbed(nn.Module):
     def __init__(self, in_chans=3, embed_dim=384):
         super().__init__()
@@ -67,9 +63,9 @@ class ConvPatchEmbed(nn.Module):
     def forward(self, x):
         # x: (B, C, H, W)
-        x = self.conv(x)                  # (B, E, H/4, W/4) -> H/4=8 for 32x32
-        x = x.flatten(2)                  # (B, E, N)
-        x = x.transpose(1, 2)             # (B, N, E)
         return x
 class MLP(nn.Module):
@@ -192,13 +188,12 @@ class ViT(nn.Module):
         out = self.head(cls)
         return out
-# ------------------------
 # Load model weights
 model = ViT(cfg).to(device)
-model.load_state_dict(torch.load("best_vit_cifar100_small.pt", map_location=device))
 model.eval()
-# ------------------------
 # Image preprocessing
 transform = transforms.Compose([
     transforms.Resize((32,32)),
@@ -215,7 +210,6 @@ def predict(img: Image.Image):
     result = {classes[i]: float(probs[i]) for i in top5.indices}
     return result
-# ------------------------
 # Gradio interface
 iface = gr.Interface(
     fn=predict,

 from PIL import Image
 import math
+# Configuration
 cfg = {
     "image_size": 32,
     "patch_size": 4,
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ViT model implementation
 class ConvPatchEmbed(nn.Module):
     def __init__(self, in_chans=3, embed_dim=384):
         super().__init__()
     def forward(self, x):
         # x: (B, C, H, W)
+        x = self.conv(x)
+        x = x.flatten(2)
+        x = x.transpose(1, 2)
         return x
 class MLP(nn.Module):
         out = self.head(cls)
         return out
 # Load model weights
 model = ViT(cfg).to(device)
+model.load_state_dict(torch.load("best_ViT_CIFAR100_baseline_checkpoint.pth", map_location=device))
 model.eval()
 # Image preprocessing
 transform = transforms.Compose([
     transforms.Resize((32,32)),
     result = {classes[i]: float(probs[i]) for i in top5.indices}
     return result
 # Gradio interface
 iface = gr.Interface(
     fn=predict,