Spaces:
Sleeping
Sleeping
| import os | |
| import io | |
| import IPython.display | |
| from PIL import Image | |
| import base64 | |
| from transformers import pipeline, AutoTokenizer | |
| import requests | |
| import gradio as gr | |
| get_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
| def generate_caption(base64_image): | |
| # Decode base64 string to PIL image | |
| image_data = base64.b64decode(base64_image) | |
| image = Image.open(io.BytesIO(image_data)) | |
| # Get caption using the BLIP model | |
| caption_result = get_completion(image) | |
| # Ensure a consistent format by always returning a dictionary | |
| if isinstance(caption_result, str): | |
| return {'generated_text': caption_result} | |
| elif caption_result and isinstance(caption_result, list): | |
| return caption_result[0] | |
| else: | |
| return {'generated_text': None} | |
| def image_to_base64_str(pil_image): | |
| byte_arr = io.BytesIO() | |
| pil_image.save(byte_arr, format='PNG') | |
| byte_arr = byte_arr.getvalue() | |
| return str(base64.b64encode(byte_arr).decode('utf-8')) | |
| def captioner(image): | |
| base64_image = image_to_base64_str(image) | |
| result = generate_caption(base64_image) | |
| print(result) # Debugging print statement to see the structure of the result | |
| # Access the 'generated_text' field from the result dictionary | |
| caption_text = result['generated_text'] | |
| print(caption_text) | |
| return caption_text | |
| demo = gr.Interface(fn=captioner, | |
| inputs=[gr.Image(label="Upload image", type="pil")], | |
| outputs=[gr.Textbox(label="Caption")], | |
| title="Image Captioning with BLIP", | |
| description="Caption any image using the BLIP model", | |
| allow_flagging="never", | |
| examples=["christmas_dog.jpeg", "bird_flight.jpeg", "cow.jpeg"]) | |
| demo.launch() # Remove share=True and server_port for Hugging Face Spaces |