import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import base64 import io # Load processor and model processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # Function to process both standard images and base64 strings def process_image(input_data): try: # Check if input is a base64 string if isinstance(input_data, str) and input_data.startswith("data:image"): # Extract the base64 part base64_data = input_data.split(",")[1] image_bytes = base64.b64decode(base64_data) image = Image.open(io.BytesIO(image_bytes)) elif isinstance(input_data, str) and len(input_data) > 100: # Likely a base64 string without prefix try: image_bytes = base64.b64decode(input_data) image = Image.open(io.BytesIO(image_bytes)) except: return "Error: Invalid base64 image format" else: # Standard image input image = input_data # Generate caption inputs = processor(images=image, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) return caption except Exception as e: return f"Error processing image: {str(e)}" # Create the demo with explicit API name demo = gr.Interface( fn=process_image, inputs=[ gr.Image(type="pil", label="Upload Image") ], outputs=gr.Textbox(label="Image Caption"), title="Image Captioning", description="Upload an image to get a caption", examples=[], flagging_mode="never", # Using flagging_mode instead of allow_flagging api_name="predict" # Explicitly set the API name ) # Launch with queue instead of enable_queue demo.queue().launch(share=True)