Spaces:

amar6de2
/

VisionBite

Sleeping

App Files Files Community

amar6de2 commited on Jul 1, 2025

Commit

01a5409

verified ·

1 Parent(s): 7a7cf02

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -97

app.py CHANGED Viewed

@@ -1,114 +1,65 @@
-### 1. Imports and class names setup ###
-import gradio as gr
-import os
 import torch
-import numpy as np
-from PIL import Image
-from model import create_vit_model  # Make sure this function exists in model.py
-from timeit import default_timer as timer
-from typing import Tuple, Dict
-# Setup class names (or hardcode them if needed)
-class_names = ["apple_pie", "baby_back_ribs", "baklava", "beef_carpaccio", "beef_tartare", "beet_salad",
-               "beignets", "bibimbap", "biryani", "bread_pudding", "breakfast_burrito", "bruschetta",
-               "caesar_salad", "cannoli", "caprese_salad", "carrot_cake", "ceviche", "chai", "chapati",
-               "cheese_plate", "cheesecake", "chicken_curry", "chicken_quesadilla", "chicken_wings",
-               "chocolate_cake", "chocolate_mousse", "chole_bhature", "churros", "clam_chowder",
-               "club_sandwich", "crab_cakes", "creme_brulee", "croque_madame", "cup_cakes", "dabeli",
-               "dal", "deviled_eggs", "dhokla", "donuts", "dosa", "dumplings", "edamame", "eggs_benedict",
-               "escargots", "falafel", "filet_mignon", "fish_and_chips", "foie_gras", "french_fries",
-               "french_onion_soup", "french_toast", "fried_calamari", "fried_rice", "frozen_yogurt",
-               "garlic_bread", "gnocchi", "greek_salad", "grilled_cheese_sandwich", "grilled_salmon",
-               "guacamole", "gyoza", "hamburger", "hot_and_sour_soup", "hot_dog", "huevos_rancheros",
-               "hummus", "ice_cream", "idli", "jalebi", "kathi_rolls", "kofta", "kulfi", "lasagna",
-               "lobster_bisque", "lobster_roll_sandwich", "macaroni_and_cheese", "macarons", "miso_soup",
-               "momos", "mussels", "naan", "nachos", "omelette", "onion_rings", "oysters", "pad_thai",
-               "paella", "pakoda", "pancakes", "pani_puri", "panna_cotta", "panner_butter_masala",
-               "pav_bhaji", "peking_duck", "pho", "pizza", "pork_chop", "poutine", "prime_rib",
-               "pulled_pork_sandwich", "ramen", "ravioli", "red_velvet_cake", "risotto", "samosa",
-               "sashimi", "scallops", "seaweed_salad", "shrimp_and_grits", "spaghetti_bolognese",
-               "spaghetti_carbonara", "spring_rolls", "steak", "strawberry_shortcake", "sushi",
-               "tacos", "takoyaki", "tiramisu", "tuna_tartare", "vadapav", "waffles"]
-### 2. Model and transforms setup ###
-# Create the model and transforms
-vit, vit_transforms = create_vit_model(num_classes=len(class_names))
-# Load saved model weights (assumes model is trained and .pth file is in the correct path)
-vit.load_state_dict(torch.load("vit_epoch_2.pth", map_location=torch.device("cpu")))
-### 3. Prediction function ###
-def predict(img) -> Tuple[Dict[str, float], float]:
-    """Transforms and performs a prediction on img and returns prediction and time taken."""
-    from PIL import UnidentifiedImageError
-    try:
-        # Convert ndarray to PIL if needed
-        if isinstance(img, np.ndarray):
-            img = Image.fromarray(img)
-        # Catch bad image input
-        if img.mode != "RGB":
-            img = img.convert("RGB")
-        # Start timer
-        start_time = timer()
-        # Transform and add batch dimension
-        img_tensor = vit_transforms(img).unsqueeze(0)
-        # Inference
-        vit.eval()
-        with torch.inference_mode():
-            pred_probs = torch.softmax(vit(img_tensor), dim=1)
-        pred_labels_and_probs = {
-            class_names[i]: float(pred_probs[0][i])
-            for i in range(len(class_names))
-        }
-        pred_time = round(timer() - start_time, 5)
-        return pred_labels_and_probs, pred_time
-    except (UnidentifiedImageError, TypeError, ValueError) as e:
-        return {"Error": f"Invalid image input: {str(e)}"}, 0.0
-### 4. Gradio app setup ###
-# Title, description, and article text
-title = "VisionBite 🍕🥩🍣"
-description = (
-    "A Vision Transformer (ViT-Base-16) model trained to classify images of food "
-    "into 121 distinct categories. The model uses a transformer-based architecture "
-    "to extract visual features and achieve accurate classification across diverse food items."
-)
-article = (
-    "Model trained on the [Food121 dataset](https://huggingface.co/datasets/ItsNotRohit/Food121) "
-    "with 95% top-5 prediction accuracy."
-)
-# Setup example images (if available)
-if os.path.exists("examples"):
-    example_list = [["examples/" + f] for f in os.listdir("examples") if f.endswith((".jpg", ".jpeg", ".png"))]
-else:
-    example_list = []
-# Create Gradio interface
 demo = gr.Interface(
     fn=predict,
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Label(num_top_classes=5, label="Top Predictions"),
-        gr.Number(label="Prediction time (s)")
-    ],
-    examples=example_list,
-    title=title,
-    description=description,
-    article=article
 )
-# Launch app
-demo.launch()

 import torch
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from torchvision.models import vit_b_16
+from torchvision.transforms import v2
+from PIL import Image
+import gradio as gr
+import os
+# Load pretrained model
+model = vit_b_16(weights='DEFAULT')
+model.eval()
+# Transformation for ViT
+vit_transforms = v2.Compose([
+    v2.Resize((224, 224)),
+    v2.ToImage(),  # Ensure proper image type
+    v2.ToDtype(torch.float32, scale=True),
+    v2.Normalize(mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]),
+])
+# Class labels (example)
+class_labels = [f"Class {i}" for i in range(1000)]  # Replace with actual class names if you have them
+def predict(img):
+    # Defensive: Ensure image is PIL
+    if isinstance(img, torch.Tensor):
+        raise ValueError("Expected PIL.Image, got torch.Tensor.")
+    elif isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    elif not isinstance(img, Image.Image):
+        raise ValueError("Input is not a valid PIL image")
+    # Transform and run through model
+    img_tensor = vit_transforms(img).unsqueeze(0)
+    with torch.no_grad():
+        outputs = model(img_tensor)
+        probs = F.softmax(outputs[0], dim=0)
+    top5 = torch.topk(probs, 5)
+    results = {class_labels[i]: float(probs[i]) for i in top5.indices}
+    return results
+# Set up Gradio interface
+image_input = gr.Image(type="pil", label="Upload JPEG Image")
+label_output = gr.Label(num_top_classes=5)
+example_images = ["images/sample1.jpg", "images/sample2.jpg"]
+example_images = [img for img in example_images if os.path.exists(img)]  # filter missing files
 demo = gr.Interface(
     fn=predict,
+    inputs=image_input,
+    outputs=label_output,
+    examples=example_images,
+    title="ViT Image Classifier",
+    description="Upload a JPEG image to classify it using Vision Transformer (ViT-B16)."
 )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)