| from dotenv import load_dotenv, find_dotenv |
| import os |
| import io |
| from io import BytesIO |
| from PIL import Image |
| import base64 |
| import requests |
| import json |
| import warnings |
| import gradio as gr |
|
|
| |
| warnings.filterwarnings("ignore", message=".*Using the model-agnostic default `max_length`.*") |
|
|
| |
| load_dotenv(find_dotenv()) |
| hf_api_key = os.getenv('HF_API_KEY') |
| endpoint_url = os.getenv('HF_API_ITT_BASE') |
|
|
| |
|
|
| def get_completion(image, parameters=None, endpoint_url=endpoint_url): |
| headers = { |
| "Authorization": f"Bearer {hf_api_key}", |
| "Content-Type": "application/json" |
| } |
| |
| buffered = BytesIO() |
| image.save(buffered, format="JPEG") |
| image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
| data = {"inputs": {"image": image_base64}} |
| if parameters is not None: |
| data.update({"parameters": parameters}) |
|
|
| response = requests.post(endpoint_url, headers=headers, data=json.dumps(data)) |
|
|
| if response.status_code != 200: |
| return {"error": response.text} |
|
|
| try: |
| |
| response_data = json.loads(response.content.decode("utf-8")) |
|
|
| |
| if isinstance(response_data, list) and len(response_data) > 0: |
| return response_data[0] |
| elif isinstance(response_data, dict): |
| return response_data |
| else: |
| return {"error": "Unexpected response format"} |
| except json.JSONDecodeError: |
| return {"error": "Failed to decode API response"} |
|
|
|
|
| |
| def caption_image(image_url): |
| try: |
| response = requests.get(image_url) |
| response.raise_for_status() |
| image = Image.open(BytesIO(response.content)).convert("RGB") |
| |
| |
| caption_response = get_completion(image) |
|
|
| |
| if "error" in caption_response: |
| return f"Error: {caption_response['error']}" |
|
|
| return caption_response.get("generated_text", "No caption generated.") |
|
|
| except Exception as e: |
| return f"Error processing image: {str(e)}" |
|
|
| |
| demo = gr.Interface( |
| fn=caption_image, |
| inputs=gr.Textbox(label="Image URL"), |
| outputs="text", |
| title="Image Captioning App", |
| description=( |
| "Upload an image or use one of the predefined samples to generate a caption. " |
| "This app uses a Hugging Face Inference Endpoint for the `Salesforce/blip-image-captioning-base` model." |
| ), |
| article="Free images are available on: [https://free-images.com/](https://free-images.com/)" |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|
|
|