Spaces:

Skorm
/

computer-vision-project

Sleeping

App Files Files Community

Skorm commited on Jun 2, 2025

Commit

2636156

verified ·

1 Parent(s): 25224a3

Update App to compare to Zero Shot model

Browse files

Files changed (2) hide show

app.py +34 -7
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,11 +1,35 @@
 import gradio as gr
-from transformers import pipeline
 classifier = pipeline("image-classification", model="Skorm/food11-vit")
-def classify_food(image):
-    results = classifier(image)
-    return {result["label"]: round(result["score"], 4) for result in results}
 # Example image paths
 examples = [
@@ -21,9 +45,12 @@ examples = [
 iface = gr.Interface(
     fn=classify_food,
     inputs=gr.Image(type="filepath"),
-    outputs=gr.Label(num_top_classes=3),
-    title="🍽️ Food Classification with ViT",
-    description="Upload a food image to classify it into 1 of 11 food categories.",
     examples=examples
 )

 import gradio as gr
+from transformers import pipeline, CLIPProcessor, CLIPModel
+from PIL import Image
+import torch
 classifier = pipeline("image-classification", model="Skorm/food11-vit")
+# Load CLIP model
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
+clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+# Define CLIP labels
+clip_labels = [
+    "bread", "dairy product", "dessert", "egg", "fried food",
+    "meat", "noodles or pasta", "rice", "seafood", "soup", "vegetables or fruits"
+]
+def classify_food(image_path):
+    image = Image.open(image_path)
+    # ----- ViT prediction -----
+    vit_results = classifier(image_path)
+    vit_output = {result["label"]: round(result["score"], 4) for result in vit_results}
+    # ----- CLIP zero-shot prediction -----
+    inputs = clip_processor(text=clip_labels, images=image, return_tensors="pt", padding=True)
+    outputs = clip_model(**inputs)
+    probs = outputs.logits_per_image.softmax(dim=1)[0]
+    clip_output = {label: round(float(score), 4) for label, score in zip(clip_labels, probs)}
+    return vit_output, clip_output
 # Example image paths
 examples = [
 iface = gr.Interface(
     fn=classify_food,
     inputs=gr.Image(type="filepath"),
+    outputs=[
+        gr.Label(num_top_classes=3, label="ViT (Fine-tuned) Prediction"),
+        gr.Label(num_top_classes=3, label="CLIP Zero-Shot Prediction")
+    ],
+    title="🍽️ Food Classification with ViT and Zero-Shot CLIP",
+    description="Upload a food image. The app compares predictions between your fine-tuned ViT model and zero-shot CLIP.",
     examples=examples
 )

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 transformers
-torch

 transformers
+torch
+gradio
+Pillow