Spaces:

amar6de2
/

VisionBite

Sleeping

App Files Files Community

amar6de2 commited on Jul 1, 2025

Commit

5f0b74a

1 Parent(s): b289894

done

Browse files

Files changed (1) hide show

app.py +96 -40

app.py CHANGED Viewed

@@ -1,49 +1,105 @@
 import gradio as gr
 import torch
 import numpy as np
 from PIL import Image
-from model import create_vit_model
-# Load model and transforms
-vit, vit_transforms = create_vit_model()
-vit.eval()
-# Define class names (replace with your actual class names)
-class_names = [f"class_{i}" for i in range(121)]  # Replace with real labels if available
-# Prediction function
-def predict(img):
-    try:
-        # 🛡️ Ensure valid PIL Image
-        if isinstance(img, np.ndarray):
-            img = Image.fromarray(img)
-        if img is None or not isinstance(img, Image.Image):
-            raise ValueError("Invalid image input or format.")
-        # ✅ Convert to RGB to avoid dtype errors
-        if img.mode != "RGB":
-            img = img.convert("RGB")
-        # 📦 Transform and predict
-        img_tensor = vit_transforms(img).unsqueeze(0)
-        with torch.no_grad():
-            preds = vit(img_tensor)
-            probs = torch.softmax(preds, dim=1)[0]
-            top5 = torch.topk(probs, k=5)
-            results = {class_names[idx]: float(probs[idx]) for idx in top5.indices}
-            return results
-    except Exception as e:
-        raise RuntimeError(f"Prediction failed: {str(e)}") from e
-# Gradio interface
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Image(type="pil"),
-    outputs=gr.Label(num_top_classes=5),
-    title="ViT Image Classifier",
-    description="Upload an image to classify using Vision Transformer (ViT)."
 )
-if __name__ == "__main__":
-    demo.launch()

+### 1. Imports and class names setup ###
 import gradio as gr
+import os
 import torch
 import numpy as np
 from PIL import Image
+from model import create_vit_model  # Make sure this function exists in model.py
+from timeit import default_timer as timer
+from typing import Tuple, Dict
+# Setup class names (or hardcode them if needed)
+class_names = ["apple_pie", "baby_back_ribs", "baklava", "beef_carpaccio", "beef_tartare", "beet_salad",
+               "beignets", "bibimbap", "biryani", "bread_pudding", "breakfast_burrito", "bruschetta",
+               "caesar_salad", "cannoli", "caprese_salad", "carrot_cake", "ceviche", "chai", "chapati",
+               "cheese_plate", "cheesecake", "chicken_curry", "chicken_quesadilla", "chicken_wings",
+               "chocolate_cake", "chocolate_mousse", "chole_bhature", "churros", "clam_chowder",
+               "club_sandwich", "crab_cakes", "creme_brulee", "croque_madame", "cup_cakes", "dabeli",
+               "dal", "deviled_eggs", "dhokla", "donuts", "dosa", "dumplings", "edamame", "eggs_benedict",
+               "escargots", "falafel", "filet_mignon", "fish_and_chips", "foie_gras", "french_fries",
+               "french_onion_soup", "french_toast", "fried_calamari", "fried_rice", "frozen_yogurt",
+               "garlic_bread", "gnocchi", "greek_salad", "grilled_cheese_sandwich", "grilled_salmon",
+               "guacamole", "gyoza", "hamburger", "hot_and_sour_soup", "hot_dog", "huevos_rancheros",
+               "hummus", "ice_cream", "idli", "jalebi", "kathi_rolls", "kofta", "kulfi", "lasagna",
+               "lobster_bisque", "lobster_roll_sandwich", "macaroni_and_cheese", "macarons", "miso_soup",
+               "momos", "mussels", "naan", "nachos", "omelette", "onion_rings", "oysters", "pad_thai",
+               "paella", "pakoda", "pancakes", "pani_puri", "panna_cotta", "panner_butter_masala",
+               "pav_bhaji", "peking_duck", "pho", "pizza", "pork_chop", "poutine", "prime_rib",
+               "pulled_pork_sandwich", "ramen", "ravioli", "red_velvet_cake", "risotto", "samosa",
+               "sashimi", "scallops", "seaweed_salad", "shrimp_and_grits", "spaghetti_bolognese",
+               "spaghetti_carbonara", "spring_rolls", "steak", "strawberry_shortcake", "sushi",
+               "tacos", "takoyaki", "tiramisu", "tuna_tartare", "vadapav", "waffles"]
+### 2. Model and transforms setup ###
+# Create the model and transforms
+vit, vit_transforms = create_vit_model(num_classes=len(class_names))
+# Load saved model weights (assumes model is trained and .pth file is in the correct path)
+vit.load_state_dict(torch.load("vit_epoch_2.pth", map_location=torch.device("cpu")))
+### 3. Prediction function ###
+def predict(img) -> Tuple[Dict[str, float], float]:
+    """Transforms and performs a prediction on img and returns prediction and time taken."""
+    # Ensure the image is a PIL image
+    if isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    # Start the timer
+    start_time = timer()
+    # Transform the image and add batch dimension
+    img = vit_transforms(img).unsqueeze(0)
+    # Run inference
+    vit.eval()
+    with torch.inference_mode():
+        pred_probs = torch.softmax(vit(img), dim=1)
+    # Create label and probability dict
+    pred_labels_and_probs = {
+        class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))
+    }
+    # Calculate prediction time
+    pred_time = round(timer() - start_time, 5)
+    return pred_labels_and_probs, pred_time
+### 4. Gradio app setup ###
+# Title, description, and article text
+title = "VisionBite 🍕🥩🍣"
+description = (
+    "A Vision Transformer (ViT-Base-16) model trained to classify images of food "
+    "into 121 distinct categories. The model uses a transformer-based architecture "
+    "to extract visual features and achieve accurate classification across diverse food items."
+)
+article = (
+    "Model trained on the [Food121 dataset](https://huggingface.co/datasets/ItsNotRohit/Food121) "
+    "with 95% top-5 prediction accuracy."
+)
+# Setup example images (if available)
+if os.path.exists("examples"):
+    example_list = [["examples/" + f] for f in os.listdir("examples") if f.endswith((".jpg", ".jpeg", ".png"))]
+else:
+    example_list = []
+# Create Gradio interface
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Image(type="pil"),
+    outputs=[
+        gr.Label(num_top_classes=5, label="Top Predictions"),
+        gr.Number(label="Prediction time (s)")
+    ],
+    examples=example_list,
+    title=title,
+    description=description,
+    article=article
 )
+# Launch app
+demo.launch()