Spaces:

dlaima
/

Multiple_Image_captioning

Running

App Files Files Community

dlaima commited on Jan 10, 2025

Commit

d907d8c

verified ·

1 Parent(s): 8332de3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -40

app.py CHANGED Viewed

@@ -1,72 +1,70 @@
 from dotenv import load_dotenv, find_dotenv
-load_dotenv(find_dotenv())
-import os # Provides a way of using operating system-dependent functionality
-import io # Provides core tools for working with streams of data
 from io import BytesIO
-import IPython.display # Used for displaying rich content (e.g., images, HTML) in Jupyter Notebooks
-from PIL import Image # Python Imaging Library for opening, manipulating, and saving image files
-import base64 # Encodes and decodes data in base64 format
 import requests
 import json
-import torch
-import torch.nn as nn
 import warnings
 import gradio as gr
-# Ignore specific UserWarnings related to max_length in transformers
 warnings.filterwarnings("ignore", message=".*Using the model-agnostic default `max_length`.*")
 # Load environment variables from .env file
 hf_api_key = os.getenv('API_TOKEN')
 endpoint_url = os.getenv('INFERENCE_ENDPOINT')
-# Set your Inference Endpoint URL and API key
-#INFERENCE_ENDPOINT = "https://your-endpoint-url"  # Replace with your endpoint URL
-#API_TOKEN = "your-api-token"  # Replace with your Hugging Face API token
-#Image-to-text endpoint - Helper funcion
-def get_completion(inputs, parameters=None, endpoint_url=endpoint_url):
     headers = {
         "Authorization": f"Bearer {hf_api_key}",
         "Content-Type": "application/json"
     }
-    data = {"inputs": inputs}
     if parameters is not None:
         data.update({"parameters": parameters})
     response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))
-    return json.loads(response.content.decode("utf-8"))
-def get_generation(model, processor, image, dtype):
-    inputs = processor(image, return_tensors="pt").to(dtype)
-    out = model.generate(**inputs)
-    return processor.decode(out[0], skip_special_tokens=True)
-def load_image(img_url):
-    image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-    return image
-#Gradio interface
 def caption_image(image_url):
-    # Download the image from the URL
-    response = requests.get(image_url)
-    response.raise_for_status()  # Ensure the request was successful
-    image = Image.open(BytesIO(response.content))  # Load image with PIL
-    # Call your captioning function here (replace `get_completion` with the actual implementation)
-    #caption = get_completion(image)
-    caption = get_completion(image_url)
-    return caption
-# Gradio interface
 demo = gr.Interface(
     fn=caption_image,
-    inputs=gr.Textbox(label="Image URL"),  # Input as a URL
     outputs="text",
-    #examples=[Image1, Image2, Image3],
-    #examples=[image],
     title="Image Captioning App",
     description=(
         "Upload an image or use one of the predefined samples to generate a caption. "
@@ -76,3 +74,4 @@ demo = gr.Interface(
 if __name__ == "__main__":
     demo.launch()

 from dotenv import load_dotenv, find_dotenv
+import os
+import io
 from io import BytesIO
+from PIL import Image
+import base64
 import requests
 import json
 import warnings
 import gradio as gr
+# Suppress specific warnings
 warnings.filterwarnings("ignore", message=".*Using the model-agnostic default `max_length`.*")
 # Load environment variables from .env file
+load_dotenv(find_dotenv())
 hf_api_key = os.getenv('API_TOKEN')
 endpoint_url = os.getenv('INFERENCE_ENDPOINT')
+# Helper function for image-to-text API
+def get_completion(image, parameters=None, endpoint_url=endpoint_url):
     headers = {
         "Authorization": f"Bearer {hf_api_key}",
         "Content-Type": "application/json"
     }
+    # Convert image to base64 format
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    data = {"inputs": {"image": image_base64}}
     if parameters is not None:
         data.update({"parameters": parameters})
     response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))
+    # Check for errors
+    if response.status_code != 200:
+        return {"error": response.text}
+    return json.loads(response.content.decode("utf-8"))
+# Helper function to download and process the image from a URL
 def caption_image(image_url):
+    try:
+        response = requests.get(image_url)
+        response.raise_for_status()
+        image = Image.open(BytesIO(response.content)).convert("RGB")
+        # Get caption from API
+        caption_response = get_completion(image)
+        # Handle API response
+        if "error" in caption_response:
+            return f"Error: {caption_response['error']}"
+        return caption_response.get("generated_text", "No caption generated.")
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+# Gradio interface
 demo = gr.Interface(
     fn=caption_image,
+    inputs=gr.Textbox(label="Image URL"),
     outputs="text",
     title="Image Captioning App",
     description=(
         "Upload an image or use one of the predefined samples to generate a caption. "
 if __name__ == "__main__":
     demo.launch()