Spaces:

PRSHNTKUMR
/

image_to_caption

Sleeping

App Files Files Community

image_to_caption / app.py

PRSHNTKUMR

Update app.py

c45d72e verified 10 months ago

raw

history blame contribute delete

1.87 kB

	import os
	import io
	import IPython.display
	from PIL import Image
	import base64
	from transformers import pipeline, AutoTokenizer
	import requests
	import gradio as gr


	get_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	def generate_caption(base64_image):
	# Decode base64 string to PIL image
	image_data = base64.b64decode(base64_image)
	image = Image.open(io.BytesIO(image_data))

	# Get caption using the BLIP model
	caption_result = get_completion(image)

	# Ensure a consistent format by always returning a dictionary
	if isinstance(caption_result, str):
	return {'generated_text': caption_result}
	elif caption_result and isinstance(caption_result, list):
	return caption_result[0]
	else:
	return {'generated_text': None}

	def image_to_base64_str(pil_image):
	byte_arr = io.BytesIO()
	pil_image.save(byte_arr, format='PNG')
	byte_arr = byte_arr.getvalue()
	return str(base64.b64encode(byte_arr).decode('utf-8'))

	def captioner(image):
	base64_image = image_to_base64_str(image)
	result = generate_caption(base64_image)
	print(result) # Debugging print statement to see the structure of the result

	# Access the 'generated_text' field from the result dictionary
	caption_text = result['generated_text']
	print(caption_text)
	return caption_text

	demo = gr.Interface(fn=captioner,
	inputs=[gr.Image(label="Upload image", type="pil")],
	outputs=[gr.Textbox(label="Caption")],
	title="Image Captioning with BLIP",
	description="Caption any image using the BLIP model",
	allow_flagging="never",
	examples=["christmas_dog.jpeg", "bird_flight.jpeg", "cow.jpeg"])

	demo.launch() # Remove share=True and server_port for Hugging Face Spaces