import gradio as gr from transformers import pipeline # Load the image-to-text model pipeline pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") # Define the function to generate text from image def launch(input): out = pipe(input) # Get the model output return out[0]['generated_text'] # Return the generated text # Define examples with images and expected outputs examples = [ ["example1.jpeg", "a dog swimming in the ocean"], # Example 1 ["example2.png", "a fairy sitting on a tree branch"] # Example 2 ] # Create the Gradio interface iface = gr.Interface( fn=launch, inputs=gr.Image(type='pil'), # Input is an image outputs="text", # Output is a text description title="Image Captioning with BLIP", description="This application uses the BLIP image-captioning model to generate descriptions for the images you upload. " "Simply upload an image, and the model will generate a caption describing the content of the image. " "You can also try some pre-loaded examples below.", examples=[example[:1] for example in examples] # Only include image paths for Gradio ) # Launch the interface iface.launch()