Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| # Initialize the image-to-text pipeline | |
| def load_model(): | |
| return pipeline("image-text-to-text", model="llava-hf/llava-1.5-7b-hf") | |
| # Function to handle image captioning | |
| def caption_image(image, question=None): | |
| pipe = load_model() | |
| # Prepare messages based on whether a question is provided | |
| if question and question.strip(): | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "url": image}, | |
| {"type": "text", "text": question}, | |
| ], | |
| }, | |
| ] | |
| else: | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "url": image}, | |
| {"type": "text", "text": "Describe this image in detail."}, | |
| ], | |
| }, | |
| ] | |
| # Generate caption | |
| result = pipe(text=messages, max_new_tokens=150) | |
| return result[0]["generated_text"] | |
| # Function to handle example images via URL | |
| def process_example_url(url): | |
| response = requests.get(url) | |
| img = Image.open(BytesIO(response.content)) | |
| return img | |
| # Create Gradio interface | |
| with gr.Blocks(title="Image Captioning App") as demo: | |
| gr.Markdown("# Image Captioning with LLaVA") | |
| gr.Markdown("Upload an image and optionally ask a specific question about it.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| question_input = gr.Textbox(label="Question (optional)", placeholder="Ask a specific question about the image or leave blank for general description") | |
| caption_button = gr.Button("Generate Caption") | |
| with gr.Column(): | |
| caption_output = gr.Textbox(label="Generated Caption", lines=7) | |
| # Add examples | |
| example_images = [ | |
| ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg", | |
| "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"], | |
| ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1920px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", | |
| ""] | |
| ] | |
| gr.Examples( | |
| examples=example_images, | |
| inputs=[image_input, question_input], | |
| fn=process_example_url, | |
| cache_examples=True, | |
| ) | |
| # Set up the button click event | |
| caption_button.click( | |
| fn=caption_image, | |
| inputs=[image_input, question_input], | |
| outputs=caption_output | |
| ) | |
| gr.Markdown("### How to use:") | |
| gr.Markdown("1. Upload an image by clicking the upload box or drag-and-drop") | |
| gr.Markdown("2. Optionally type a specific question about the image") | |
| gr.Markdown("3. Click 'Generate Caption' to get the result") | |
| gr.Markdown("4. Try the examples below to see how it works") | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |