dlaima's picture
Update app.py
da984d3 verified
raw
history blame
2.75 kB
from dotenv import load_dotenv, find_dotenv
import os
import io
from io import BytesIO
from PIL import Image
import base64
import requests
import json
import warnings
import gradio as gr
# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*Using the model-agnostic default `max_length`.*")
# Load environment variables from .env file
load_dotenv(find_dotenv())
hf_api_key = os.getenv('HF_API_KEY')
endpoint_url = os.getenv('HF_API_ITT_BASE')
# Helper function for image-to-text API
def get_completion(image, parameters=None, endpoint_url=endpoint_url):
headers = {
"Authorization": f"Bearer {hf_api_key}",
"Content-Type": "application/json"
}
# Convert image to base64 format
buffered = BytesIO()
image.save(buffered, format="JPEG")
image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
data = {"inputs": {"image": image_base64}}
if parameters is not None:
data.update({"parameters": parameters})
response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))
if response.status_code != 200:
return {"error": response.text}
try:
# Try parsing the response as JSON
response_data = json.loads(response.content.decode("utf-8"))
# Check if it's a list and extract the first item
if isinstance(response_data, list) and len(response_data) > 0:
return response_data[0]
elif isinstance(response_data, dict):
return response_data
else:
return {"error": "Unexpected response format"}
except json.JSONDecodeError:
return {"error": "Failed to decode API response"}
# Helper function to download and process the image from a URL
def caption_image(image_url):
try:
response = requests.get(image_url)
response.raise_for_status()
image = Image.open(BytesIO(response.content)).convert("RGB")
# Get caption from API
caption_response = get_completion(image)
# Handle API response
if "error" in caption_response:
return f"Error: {caption_response['error']}"
return caption_response.get("generated_text", "No caption generated.")
except Exception as e:
return f"Error processing image: {str(e)}"
# Gradio interface
demo = gr.Interface(
fn=caption_image,
inputs=gr.Textbox(label="Image URL"),
outputs="text",
title="Image Captioning App",
description=(
"Upload an image or use one of the predefined samples to generate a caption. "
"This app uses a Hugging Face Inference Endpoint for the `Salesforce/blip-image-captioning-base` model."
),
)
if __name__ == "__main__":
demo.launch()