import gradio as gr
from transformers import pipeline

# Load the image-to-text model pipeline
pipe = pipeline("image-to-text",
                model="Salesforce/blip-image-captioning-base")

# Define the function to generate text from image
def launch(input):
    out = pipe(input)  # Get the model output
    return out[0]['generated_text']  # Return the generated text

# Define examples with images and expected outputs
examples = [
    ["example1.jpeg", "a dog swimming in the ocean"],  # Example 1
    ["example2.png", "a fairy sitting on a tree branch"]  # Example 2
]

# Create the Gradio interface
iface = gr.Interface(
    fn=launch,
    inputs=gr.Image(type='pil'),  # Input is an image
    outputs="text",  # Output is a text description
    title="Image Captioning with BLIP",
    description="This application uses the BLIP image-captioning model to generate descriptions for the images you upload. "
                "Simply upload an image, and the model will generate a caption describing the content of the image. "
                "You can also try some pre-loaded examples below.",
    examples=[example[:1] for example in examples]  # Only include image paths for Gradio
)

# Launch the interface
iface.launch()