Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from PIL import Image as PILImage | |
| # Load the image captioning model and tokenizer | |
| caption_model_name = "Salesforce/blip-image-captioning-large" | |
| caption_processor = BlipProcessor.from_pretrained(caption_model_name) | |
| caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name) | |
| # Load the emotion analysis model | |
| emotion_model_name = "SamLowe/roberta-base-go_emotions" | |
| emotion_classifier = pipeline("text-classification", model=emotion_model_name, return_all_scores=True) | |
| def generate_caption_and_analyze_emotions(image=None, text=None): | |
| try: | |
| if image is not None: | |
| # Preprocess the image for caption generation | |
| caption_inputs = caption_processor(images=image, return_tensors="pt") | |
| # Generate caption using the caption model | |
| caption_ids = caption_model.generate(**caption_inputs) | |
| # Decode the output caption | |
| decoded_caption = caption_processor.decode(caption_ids[0], skip_special_tokens=True) | |
| else: | |
| decoded_caption = text | |
| # Perform emotion analysis on the generated caption or provided text | |
| results = emotion_classifier(decoded_caption) | |
| # Prepare data for visualization | |
| labels = [result['label'] for result in results[0]] | |
| scores = [result['score'] for result in results[0]] | |
| # Plot the emotion visualization | |
| plt.figure(figsize=(10, 5)) | |
| plt.bar(labels, scores, color='skyblue') | |
| plt.xlabel('Emotions') | |
| plt.ylabel('Scores') | |
| plt.title('Emotion Analysis') | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| # Save the plot as an image | |
| plt_path = "emotion_visualization.png" | |
| plt.savefig(plt_path) | |
| plt.close() | |
| # Load the saved image for Gradio | |
| vis_image = PILImage.open(plt_path) | |
| sentiment_label = results[0][0]['label'] | |
| if sentiment_label == 'neutral': | |
| sentiment_text = "Sentiment of the text is" | |
| else: | |
| sentiment_text = "Sentiment of the text shows" | |
| caption_output = f"Caption: '{decoded_caption}'" | |
| sentiment_output = f"{sentiment_text} {sentiment_label}." | |
| return caption_output, sentiment_output, vis_image | |
| except Exception as e: | |
| return f"An error occurred: {e}", "", None | |
| # Define the Gradio interface using the new API | |
| image_input = gr.Image(label="Upload an image") | |
| text_input = gr.Textbox(label="Or enter text", lines=2) | |
| outputs = [gr.Textbox(label="Generated Caption"), gr.Textbox(label="Sentiment Analysis"), gr.Image(label="Emotion Visualization")] | |
| # Create the Gradio app | |
| app = gr.Interface(fn=generate_caption_and_analyze_emotions, inputs=[image_input, text_input], outputs=outputs) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| app.launch() | |