Spaces:

dlaima
/

Multiple_Image_captioning

Running

App Files Files Community

dlaima commited on Jan 10, 2025

Commit

46f4a73

verified ·

1 Parent(s): 800fd2d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -19

app.py CHANGED Viewed

@@ -7,26 +7,27 @@ import os
 INFERENCE_ENDPOINT = "https://your-endpoint-url"  # Replace with your endpoint URL
 API_TOKEN = "your-api-token"  # Replace with your Hugging Face API token
-def generate_caption(image):
-    """
-    Sends an image to the Hugging Face Inference Endpoint for caption generation.
-    :param image: An image in PIL format.
-    :return: Generated caption or error message.
-    """
-    headers = {"Authorization": f"Bearer {API_TOKEN}"}
-    files = {"inputs": image}
-    response = requests.post(INFERENCE_ENDPOINT, headers=headers, files=files)
-    if response.status_code == 200:
-        return response.json().get("generated_text", "No caption generated.")
-    else:
-        return f"Error: {response.status_code} - {response.text}"
 def load_image(img_url):
     image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
     return image
 #Open the images
 #Image1=Image.open('dlaima/Multiple_Image_captioning/main/image1.jpg')
 #Image2=Image.open('https://huggingface.co/spaces/dlaima/Multiple_Image_captioning/resolve/main/image2.jpeg')
@@ -35,14 +36,27 @@ def load_image(img_url):
 image_url = 'https://free-images.com/lg/9e46/white_bengal_tiger_tiger_0.jpg'
 image = load_image(image_url)
 # Gradio interface
 demo = gr.Interface(
-    fn=generate_caption,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Generated Caption"),
     #examples=[Image1, Image2, Image3],
-    examples=[image],
     title="Image Captioning App",
     description=(
         "Upload an image or use one of the predefined samples to generate a caption. "

 INFERENCE_ENDPOINT = "https://your-endpoint-url"  # Replace with your endpoint URL
 API_TOKEN = "your-api-token"  # Replace with your Hugging Face API token
+#Image-to-text endpoint
+def get_completion(inputs, parameters=None, endpoint_url=endpoint_url):
+    headers = {
+        "Authorization": f"Bearer {API_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    data = {"inputs": inputs}
+    if parameters is not None:
+        data.update({"parameters": parameters})
+    response = requests.post(INFERENCE_ENDPOINT, headers=headers, data=json.dumps(data))
+    return json.loads(response.content.decode("utf-8"))
+def get_generation(model, processor, image, dtype):
+    inputs = processor(image, return_tensors="pt").to(dtype)
+    out = model.generate(**inputs)
+    return processor.decode(out[0], skip_special_tokens=True)
 def load_image(img_url):
     image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
     return image
 #Open the images
 #Image1=Image.open('dlaima/Multiple_Image_captioning/main/image1.jpg')
 #Image2=Image.open('https://huggingface.co/spaces/dlaima/Multiple_Image_captioning/resolve/main/image2.jpeg')
 image_url = 'https://free-images.com/lg/9e46/white_bengal_tiger_tiger_0.jpg'
 image = load_image(image_url)
+def caption_image(image_url):
+    # Download the image from the URL
+    response = requests.get(image_url)
+    response.raise_for_status()  # Ensure the request was successful
+    image = Image.open(BytesIO(response.content))  # Load image with PIL
+    # Call your captioning function here (replace `get_completion` with the actual implementation)
+    #caption = get_completion(image)
+    caption = get_completion(image_url)
+    return caption
 # Gradio interface
 demo = gr.Interface(
+    fn=caption_image,
+    inputs=gr.Textbox(label="Image URL"),  # Input as a URL
+    outputs="text",
     #examples=[Image1, Image2, Image3],
+    #examples=[image],
     title="Image Captioning App",
     description=(
         "Upload an image or use one of the predefined samples to generate a caption. "