ziadmostafa's picture
debugging
71e69ad
import os
import requests
import gradio as gr
from PIL import Image
import io
class MultimodalImageCreator:
def __init__(self):
"""
Initialize the Multimodal Image Creator
Uses environment variables for API token
"""
# Retrieve API token from environment variable
self.hf_token = os.environ.get('HF_API_TOKEN')
if not self.hf_token:
raise ValueError(
"Hugging Face API token not found. "
"Set it in Spaces secrets or as an environment variable."
)
# Image Captioning API Endpoint
self.caption_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
# Text-to-Image API Endpoint
self.image_gen_api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
# Common headers for API requests
self.headers = {
"Authorization": f"Bearer {self.hf_token}",
"Content-Type": "application/octet-stream"
}
def generate_caption(self, image_path):
"""
Generate a caption for the input image using Hugging Face API
Args:
image_path (str): Path to the input image
Returns:
str: Generated image caption
"""
try:
# Read the image file
with open(image_path, "rb") as f:
data = f.read()
# Make API request
response = requests.post(
self.caption_api_url,
headers=self.headers,
data=data
)
# Check response
if response.status_code == 200:
# Extract caption from response
caption = response.json()[0].get('generated_text', 'No caption generated')
return caption
else:
return f"Error: {response.status_code} - {response.text}"
except Exception as e:
return f"An error occurred: {str(e)}"
def generate_variations(self, caption, num_variations=3):
"""
Generate image variations based on the input caption
Args:
caption (str): Base caption to generate images from
num_variations (int): Number of image variations to generate
Returns:
list: Generated image variations
"""
generated_images = []
try:
for i in range(num_variations):
# Create a slightly varied prompt
varied_prompt = f"{caption}, artistic variation {i+1}, high quality"
# Make API request
response = requests.post(
self.image_gen_api_url,
headers={
"Authorization": f"Bearer {self.hf_token}",
"Content-Type": "application/json"
},
json={"inputs": varied_prompt}
)
# Check response
if response.status_code == 200:
# Convert response to PIL Image
image = Image.open(io.BytesIO(response.content))
generated_images.append(image)
else:
print(f"Error generating variation {i+1}: {response.status_code}")
return generated_images
except Exception as e:
print(f"An error occurred during image generation: {str(e)}")
return []
def create_gradio_interface():
"""
Create a Gradio interface for the Multimodal Image Creator
Returns:
gr.Blocks: Gradio interface
"""
# Initialize the multimodal image creator
creator = MultimodalImageCreator()
def process_image(input_image, num_variations):
try:
# Validate input
if input_image is None:
return None, "Please upload an image.", [], []
# Save the uploaded image temporarily
temp_image_path = "temp_input_image.jpg"
Image.fromarray(input_image).save(temp_image_path)
# Generate caption
original_caption = creator.generate_caption(temp_image_path)
# Create variations
generated_images = creator.generate_variations(
original_caption,
num_variations=num_variations
)
# Clean up temporary file
os.remove(temp_image_path)
# Generate variation captions
variation_captions = [
f"Variation based on: {original_caption}"
for _ in generated_images
]
return input_image, original_caption, generated_images, variation_captions
except Exception as e:
return None, f"An error occurred: {str(e)}", [], []
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Multimodal Image Content Creator")
gr.Markdown("Upload an image to generate a caption and create variations!")
with gr.Row():
# Input components
with gr.Column():
input_image = gr.Image(type="numpy", label="Upload Image")
num_variations = gr.Slider(
minimum=1,
maximum=5,
value=3,
step=1,
label="Number of Variations"
)
print(num_variations)
submit_btn = gr.Button("Generate Variations")
# Output components
with gr.Column():
# Original image and caption
original_image_output = gr.Image(label="Original Image")
original_caption = gr.Textbox(label="Generated Caption")
# Variations gallery
variations_gallery = gr.Gallery(label="Image Variations")
variations_captions = gr.Textbox(label="Variation Prompts")
# Set up the processing
submit_btn.click(
fn=process_image,
inputs=[input_image, num_variations],
outputs=[
original_image_output,
original_caption,
variations_gallery,
variations_captions
]
)
return demo
# Create and launch the Gradio interface
demo = create_gradio_interface()
# If running locally
if __name__ == "__main__":
demo.launch(
share=True,
debug=True
)