Spaces:

K00B404
/

Llava_caption

Runtime error

App Files Files Community

Llava_caption / app.py

K00B404

Create app.py

ae61a84 verified about 1 year ago

raw

history blame contribute delete

3.13 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	from PIL import Image
	from io import BytesIO

	# Initialize the image-to-text pipeline
	def load_model():
	return pipeline("image-text-to-text", model="llava-hf/llava-1.5-7b-hf")

	# Function to handle image captioning
	def caption_image(image, question=None):
	pipe = load_model()

	# Prepare messages based on whether a question is provided
	if question and question.strip():
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "url": image},
	{"type": "text", "text": question},
	],
	},
	]
	else:
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "url": image},
	{"type": "text", "text": "Describe this image in detail."},
	],
	},
	]

	# Generate caption
	result = pipe(text=messages, max_new_tokens=150)
	return result[0]["generated_text"]

	# Function to handle example images via URL
	def process_example_url(url):
	response = requests.get(url)
	img = Image.open(BytesIO(response.content))
	return img

	# Create Gradio interface
	with gr.Blocks(title="Image Captioning App") as demo:
	gr.Markdown("# Image Captioning with LLaVA")
	gr.Markdown("Upload an image and optionally ask a specific question about it.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	question_input = gr.Textbox(label="Question (optional)", placeholder="Ask a specific question about the image or leave blank for general description")
	caption_button = gr.Button("Generate Caption")

	with gr.Column():
	caption_output = gr.Textbox(label="Generated Caption", lines=7)

	# Add examples
	example_images = [
	["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
	"What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"],
	["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1920px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
	""]
	]

	gr.Examples(
	examples=example_images,
	inputs=[image_input, question_input],
	fn=process_example_url,
	cache_examples=True,
	)

	# Set up the button click event
	caption_button.click(
	fn=caption_image,
	inputs=[image_input, question_input],
	outputs=caption_output
	)

	gr.Markdown("### How to use:")
	gr.Markdown("1. Upload an image by clicking the upload box or drag-and-drop")
	gr.Markdown("2. Optionally type a specific question about the image")
	gr.Markdown("3. Click 'Generate Caption' to get the result")
	gr.Markdown("4. Try the examples below to see how it works")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()