File size: 2,886 Bytes
683e91e 75fd3f8 683e91e 75fd3f8 683e91e 75fd3f8 683e91e 46f4a73 683e91e 46f4a73 0847298 46f4a73 75fd3f8 045fc21 683e91e 46f4a73 75fd3f8 46f4a73 be4c74e 46f4a73 75fd3f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
import os # Provides a way of using operating system-dependent functionality
import io # Provides core tools for working with streams of data
from io import BytesIO
import IPython.display # Used for displaying rich content (e.g., images, HTML) in Jupyter Notebooks
from PIL import Image # Python Imaging Library for opening, manipulating, and saving image files
import base64 # Encodes and decodes data in base64 format
import requests
import json
import torch
import torch.nn as nn
import warnings
import gradio as gr
# Ignore specific UserWarnings related to max_length in transformers
warnings.filterwarnings("ignore", message=".*Using the model-agnostic default `max_length`.*")
# Load environment variables from .env file
hf_api_key = os.getenv('API_TOKEN')
endpoint_url = os.getenv('INFERENCE_ENDPOINT')
# Set your Inference Endpoint URL and API key
#INFERENCE_ENDPOINT = "https://your-endpoint-url" # Replace with your endpoint URL
#API_TOKEN = "your-api-token" # Replace with your Hugging Face API token
#Image-to-text endpoint - Helper funcion
def get_completion(inputs, parameters=None, endpoint_url=endpoint_url):
headers = {
"Authorization": f"Bearer {hf_api_key}",
"Content-Type": "application/json"
}
data = {"inputs": inputs}
if parameters is not None:
data.update({"parameters": parameters})
response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))
return json.loads(response.content.decode("utf-8"))
def get_generation(model, processor, image, dtype):
inputs = processor(image, return_tensors="pt").to(dtype)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
def load_image(img_url):
image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
return image
#Gradio interface
def caption_image(image_url):
# Download the image from the URL
response = requests.get(image_url)
response.raise_for_status() # Ensure the request was successful
image = Image.open(BytesIO(response.content)) # Load image with PIL
# Call your captioning function here (replace `get_completion` with the actual implementation)
#caption = get_completion(image)
caption = get_completion(image_url)
return caption
# Gradio interface
demo = gr.Interface(
fn=caption_image,
inputs=gr.Textbox(label="Image URL"), # Input as a URL
outputs="text",
#examples=[Image1, Image2, Image3],
#examples=[image],
title="Image Captioning App",
description=(
"Upload an image or use one of the predefined samples to generate a caption. "
"This app uses a Hugging Face Inference Endpoint for the `Salesforce/blip-image-captioning-base` model."
),
)
if __name__ == "__main__":
demo.launch()
|