# app.py import os import io from PIL import Image import warnings import gradio as gr from transformers import pipeline # Suppress warnings warnings.filterwarnings("ignore", message=".*Using the model-agnostic default max_length.*") # Load BLIP image captioning pipeline captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") # Function to generate caption using the pipeline def generate_caption(image: Image.Image): try: # Convert image to RGB just in case image = image.convert("RGB") # Generate caption caption = captioner(image)[0]["generated_text"] return caption except Exception as e: return f"An error occurred: {str(e)}" # Predefined sample images def get_sample_images(): """ Returns a list of predefined sample images in the assets directory. """ sample_dir = "CreatureCaptures" # Ensure this directory exists and contains sample images try: return [ os.path.join(sample_dir, file) for file in os.listdir(sample_dir) if file.lower().endswith((".png", ".jpg", ".jpeg")) ] except FileNotFoundError: return [] # Load sample images sample_images = get_sample_images() # Gradio interface demo = gr.Interface( fn=generate_caption, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Generated Caption"), examples=sample_images, title="Image Captioning App", description=( "Upload an image or use one of the predefined samples to generate a caption. " "This app uses `Salesforce/blip-image-captioning-base` locally via Hugging Face Transformers." ), flagging_mode="never" ) if __name__ == "__main__": demo.launch()