Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification | |
| from PIL import Image | |
| import torch | |
| import torch.nn.functional as F | |
| # Load BLIP model and processor for image captioning | |
| blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| # Load emotion classification model and tokenizer | |
| emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
| emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
| def detect_emotion_caption(image): | |
| # Generate caption using BLIP | |
| inputs = blip_processor(image, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = blip_model.generate(**inputs) | |
| caption = blip_processor.decode(output[0], skip_special_tokens=True) | |
| # Predict emotion from caption | |
| inputs = emotion_tokenizer(caption, return_tensors="pt") | |
| with torch.no_grad(): | |
| logits = emotion_model(**inputs).logits | |
| probs = F.softmax(logits, dim=1) | |
| predicted_class = torch.argmax(probs, dim=1).item() | |
| emotion = emotion_model.config.id2label[predicted_class] | |
| return f"Caption: {caption}\nDetected Emotion: {emotion}" | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=detect_emotion_caption, | |
| inputs=gr.Image(type="pil"), | |
| outputs="text", | |
| title="Image Emotion Detection", | |
| description="Upload an image. The app will generate a caption and detect the associated emotion." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |