Spaces:
Build error
Build error
File size: 1,609 Bytes
39fced5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import torch
import gradio as gr
# Modell laden
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()
# Deine Emotion Labels
emotion_labels = [
"a happy person",
"a sad person",
"an angry person",
"a surprised person",
"a fearful person",
"a disgusted person",
"a neutral person",
"a contemptuous person",
"an unknown emotion"
]
# Funktion
def zero_shot_predict(image):
image = image.convert("RGB")
inputs = processor(
text=emotion_labels,
images=image,
return_tensors="pt",
padding=True
).to(device)
with torch.no_grad():
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # Bild-Text Ähnlichkeiten
probs = logits_per_image.softmax(dim=1) # Wahrscheinlichkeiten
top3_prob, top3_idx = torch.topk(probs, 3)
# Ergebnisse
top3 = [(emotion_labels[i], f"{p.item() * 100:.2f}%") for i, p in zip(top3_idx[0], top3_prob[0])]
best_emotion = emotion_labels[top3_idx[0][0]]
return best_emotion, top3
# Gradio Interface
interface = gr.Interface(
fn=zero_shot_predict,
inputs=gr.Image(type="pil"),
outputs=["text", gr.Dataframe(headers=["Emotion", "Confidence (%)"])],
title="Zero-Shot Emotion Recognition",
description="Erkenne Emotionen ohne Training — einfach mit CLIP!"
)
interface.launch()
|