Description / app.py
Sirivennela's picture
Create app.py
55460d5 verified
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image
import torch
import torch.nn.functional as F
# Load BLIP model and processor for image captioning
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# Load emotion classification model and tokenizer
emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
def detect_emotion_caption(image):
# Generate caption using BLIP
inputs = blip_processor(image, return_tensors="pt")
with torch.no_grad():
output = blip_model.generate(**inputs)
caption = blip_processor.decode(output[0], skip_special_tokens=True)
# Predict emotion from caption
inputs = emotion_tokenizer(caption, return_tensors="pt")
with torch.no_grad():
logits = emotion_model(**inputs).logits
probs = F.softmax(logits, dim=1)
predicted_class = torch.argmax(probs, dim=1).item()
emotion = emotion_model.config.id2label[predicted_class]
return f"Caption: {caption}\nDetected Emotion: {emotion}"
# Gradio interface
iface = gr.Interface(
fn=detect_emotion_caption,
inputs=gr.Image(type="pil"),
outputs="text",
title="Image Emotion Detection",
description="Upload an image. The app will generate a caption and detect the associated emotion."
)
if __name__ == "__main__":
iface.launch()