import gradio as gr
import requests
from PIL import Image
import os

# Set your Inference Endpoint URL and API key
INFERENCE_ENDPOINT = "https://your-endpoint-url"  # Replace with your endpoint URL
API_TOKEN = "your-api-token"  # Replace with your Hugging Face API token

def generate_caption(image):
    """
    Sends an image to the Hugging Face Inference Endpoint for caption generation.
    :param image: An image in PIL format.
    :return: Generated caption or error message.
    """
    headers = {"Authorization": f"Bearer {API_TOKEN}"}
    files = {"inputs": image}
    response = requests.post(INFERENCE_ENDPOINT, headers=headers, files=files)

    if response.status_code == 200:
        return response.json().get("generated_text", "No caption generated.")
    else:
        return f"Error: {response.status_code} - {response.text}"


#Open the images
Image1=Image.open('https://huggingface.co/spaces/dlaima/Multiple_Image_captioning/resolve/main/image1.jpg')
Image2=Image.open('https://huggingface.co/spaces/dlaima/Multiple_Image_captioning/resolve/main/image2.jpeg')
Image3=Image.open('https://huggingface.co/spaces/dlaima/Multiple_Image_captioning/resolve/main/image3.jpeg')


# Gradio interface

demo = gr.Interface(
    fn=generate_caption,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Textbox(label="Generated Caption"),
    examples=[Image1, Image2, Image3],
    title="Image Captioning App",
    description=(
        "Upload an image or use one of the predefined samples to generate a caption. "
        "This app uses a Hugging Face Inference Endpoint for the `Salesforce/blip-image-captioning-base` model."
    ),
)

if __name__ == "__main__":
    demo.launch()