Spaces:

jatinteamoxio
/

ImageToText

Build error

File size: 1,994 Bytes

3cde9b2
 
 
4bebfad
 
0e12d1c
 
3cde9b2
 
0e12d1c
de09b8d
 
4bebfad
de09b8d
 
 
 
 
 
 
 
 
21f6863
de09b8d
 
 
 
 
 
 
4bebfad
 
 
 
 
 
0e12d1c
a5e2f7b
de09b8d
 
 
 
 
 
 
 
 
21f6863
a5e2f7b
de09b8d
4bebfad
21f6863

import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import base64
import io

# Load processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Function to process both standard images and base64 strings
def process_image(input_data):
    try:
        # Check if input is a base64 string
        if isinstance(input_data, str) and input_data.startswith("data:image"):
            # Extract the base64 part
            base64_data = input_data.split(",")[1]
            image_bytes = base64.b64decode(base64_data)
            image = Image.open(io.BytesIO(image_bytes))
        elif isinstance(input_data, str) and len(input_data) > 100:  # Likely a base64 string without prefix
            try:
                image_bytes = base64.b64decode(input_data)
                image = Image.open(io.BytesIO(image_bytes))
            except:
                return "Error: Invalid base64 image format"
        else:
            # Standard image input
            image = input_data
            
        # Generate caption
        inputs = processor(images=image, return_tensors="pt")
        out = model.generate(**inputs)
        caption = processor.decode(out[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Create the demo with explicit API name
demo = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image")
    ],
    outputs=gr.Textbox(label="Image Caption"),
    title="Image Captioning",
    description="Upload an image to get a caption",
    examples=[],
    flagging_mode="never",  # Using flagging_mode instead of allow_flagging
    api_name="predict"  # Explicitly set the API name
)

# Launch with queue instead of enable_queue
demo.queue().launch(share=True)