ImageToText / app.py
jatinteamoxio's picture
Update app.py
21f6863 verified
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import base64
import io
# Load processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# Function to process both standard images and base64 strings
def process_image(input_data):
try:
# Check if input is a base64 string
if isinstance(input_data, str) and input_data.startswith("data:image"):
# Extract the base64 part
base64_data = input_data.split(",")[1]
image_bytes = base64.b64decode(base64_data)
image = Image.open(io.BytesIO(image_bytes))
elif isinstance(input_data, str) and len(input_data) > 100: # Likely a base64 string without prefix
try:
image_bytes = base64.b64decode(input_data)
image = Image.open(io.BytesIO(image_bytes))
except:
return "Error: Invalid base64 image format"
else:
# Standard image input
image = input_data
# Generate caption
inputs = processor(images=image, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
return caption
except Exception as e:
return f"Error processing image: {str(e)}"
# Create the demo with explicit API name
demo = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="pil", label="Upload Image")
],
outputs=gr.Textbox(label="Image Caption"),
title="Image Captioning",
description="Upload an image to get a caption",
examples=[],
flagging_mode="never", # Using flagging_mode instead of allow_flagging
api_name="predict" # Explicitly set the API name
)
# Launch with queue instead of enable_queue
demo.queue().launch(share=True)