|
|
|
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
vit_classifier = pipeline("image-classification", model="chrisis2/vit-food-classification-chrisis2") |
|
|
clip_detector = pipeline(model="openai/clip-vit-large-patch14", task="zero-shot-image-classification") |
|
|
|
|
|
|
|
|
food_labels = [ |
|
|
"Baked Potato", "Crispy Chicken", "Donut", "Fries", "Hot Dog", "Sandwich", "Taco", "Taquito", |
|
|
"apple_pie", "burger", "butter_naan", "chai", "chapati", "cheesecake", "chicken_curry", |
|
|
"chole_bhature", "dal_makhani", "dhokla", "fried_rice", "ice_cream", "idli", "jalebi", |
|
|
"kaathi_rolls", "kadai_paneer", "kulfi", "masala_dosa", "momos", "omelette", "paani_puri", |
|
|
"pakode", "pav_bhaji", "pizza", "samosa", "sushi" |
|
|
] |
|
|
|
|
|
def classify_food(image): |
|
|
|
|
|
vit_results = vit_classifier(image) |
|
|
vit_output = {result['label']: result['score'] for result in vit_results} |
|
|
|
|
|
|
|
|
clip_results = clip_detector(image, candidate_labels=food_labels) |
|
|
clip_output = {result['label']: result['score'] for result in clip_results} |
|
|
|
|
|
return {"Trainiertes ViT-Modell": vit_output, "CLIP Zero-Shot": clip_output} |
|
|
|
|
|
|
|
|
example_images = [ |
|
|
["gradio-food-app/example_images/burger.jpg.jpg"], |
|
|
["gradio-food-app/example_images/burger.jpg.jpg"], |
|
|
["gradio-food-app/example_images/burger.jpg.jpg"], |
|
|
["gradio-food-app/example_images/pizza.jpg.jpg"], |
|
|
["gradio-food-app/example_images/sushi.jpg.jpg"], |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=classify_food, |
|
|
inputs=gr.Image(type="filepath"), |
|
|
outputs=gr.JSON(), |
|
|
title="Food Classification Vergleich", |
|
|
description="Lade ein Bild eines Lebensmittels hoch und vergleiche die Ergebnisse eines trainierten ViT-Modells mit einem Zero-Shot CLIP-Modell.", |
|
|
examples=example_images |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|