Spaces:

jatinteamoxio
/

ImageToText

Build error

App Files Files Community

jatinteamoxio commited on May 9, 2025

Commit

de09b8d

verified ·

1 Parent(s): 4bebfad

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -40

app.py CHANGED Viewed

@@ -8,25 +8,26 @@ import io
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-# Captioning function for direct image input
-def caption_image(image):
-    inputs = processor(images=image, return_tensors="pt")
-    out = model.generate(**inputs)
-    caption = processor.decode(out[0], skip_special_tokens=True)
-    return caption
-# API endpoint function that can handle base64 images
-def api_caption_image(base64_img):
     try:
-        # Remove the data URL prefix if present
-        if "," in base64_img:
-            base64_img = base64_img.split(",")[1]
-        # Decode base64 to image
-        image_bytes = base64.b64decode(base64_img)
-        image = Image.open(io.BytesIO(image_bytes))
-        # Process with model
         inputs = processor(images=image, return_tensors="pt")
         out = model.generate(**inputs)
         caption = processor.decode(out[0], skip_special_tokens=True)
@@ -34,26 +35,18 @@ def api_caption_image(base64_img):
     except Exception as e:
         return f"Error processing image: {str(e)}"
-# Create Blocks for more flexibility
-with gr.Blocks() as demo:
-    with gr.Tab("Image Captioning"):
-        gr.Interface(
-            fn=caption_image,
-            inputs=gr.Image(type="pil"),
-            outputs="text",
-            title="Explain this Image",
-            flagging_mode="never",
-        )
-    # Define the API endpoint explicitly
-    gr.Interface(
-        fn=api_caption_image,
-        inputs=gr.Textbox(),  # For base64 input
-        outputs="text",
-        title="API Endpoint",
-        flagging_mode="never",
-        api_name="predict"  # This is the API endpoint name
-    )
-# Launch with queue and API open
-demo.queue(api_open=True).launch(share=True)

 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# Function to process both standard images and base64 strings
+def process_image(input_data):
     try:
+        # Check if input is a base64 string
+        if isinstance(input_data, str) and input_data.startswith("data:image"):
+            # Extract the base64 part
+            base64_data = input_data.split(",")[1]
+            image_bytes = base64.b64decode(base64_data)
+            image = Image.open(io.BytesIO(image_bytes))
+        elif isinstance(input_data, str) and len(input_data) > 100:  # Likely a base64 string without prefix
+            try:
+                image_bytes = base64.b64decode(input_data)
+                image = Image.open(io.BytesIO(image_bytes))
+            except:
+                return "Error: Invalid base64 image format"
+        else:
+            # Standard image input
+            image = input_data
+        # Generate caption
         inputs = processor(images=image, return_tensors="pt")
         out = model.generate(**inputs)
         caption = processor.decode(out[0], skip_special_tokens=True)
     except Exception as e:
         return f"Error processing image: {str(e)}"
+# Create the demo with both direct image upload and API endpoint
+demo = gr.Interface(
+    fn=process_image,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image")
+    ],
+    outputs=gr.Textbox(label="Image Caption"),
+    title="Image Captioning",
+    description="Upload an image to get a caption",
+    examples=[],
+    allow_flagging="never"
+)
+# Important: Expose the same function for API usage
+demo.launch(share=True)