Spaces:

ziadmostafa
/

Multimodal-Image-Content-Creator

Sleeping

App Files Files Community

Multimodal-Image-Content-Creator / app.py

ziadmostafa

debugging

71e69ad about 1 year ago

raw

history blame contribute delete

6.87 kB

	import os
	import requests
	import gradio as gr
	from PIL import Image
	import io

	class MultimodalImageCreator:
	def __init__(self):
	"""
	Initialize the Multimodal Image Creator
	Uses environment variables for API token
	"""
	# Retrieve API token from environment variable
	self.hf_token = os.environ.get('HF_API_TOKEN')

	if not self.hf_token:
	raise ValueError(
	"Hugging Face API token not found. "
	"Set it in Spaces secrets or as an environment variable."
	)

	# Image Captioning API Endpoint
	self.caption_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"

	# Text-to-Image API Endpoint
	self.image_gen_api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"

	# Common headers for API requests
	self.headers = {
	"Authorization": f"Bearer {self.hf_token}",
	"Content-Type": "application/octet-stream"
	}

	def generate_caption(self, image_path):
	"""
	Generate a caption for the input image using Hugging Face API

	Args:
	image_path (str): Path to the input image

	Returns:
	str: Generated image caption
	"""
	try:
	# Read the image file
	with open(image_path, "rb") as f:
	data = f.read()

	# Make API request
	response = requests.post(
	self.caption_api_url,
	headers=self.headers,
	data=data
	)

	# Check response
	if response.status_code == 200:
	# Extract caption from response
	caption = response.json()[0].get('generated_text', 'No caption generated')
	return caption
	else:
	return f"Error: {response.status_code} - {response.text}"

	except Exception as e:
	return f"An error occurred: {str(e)}"

	def generate_variations(self, caption, num_variations=3):
	"""
	Generate image variations based on the input caption

	Args:
	caption (str): Base caption to generate images from
	num_variations (int): Number of image variations to generate

	Returns:
	list: Generated image variations
	"""
	generated_images = []

	try:
	for i in range(num_variations):
	# Create a slightly varied prompt
	varied_prompt = f"{caption}, artistic variation {i+1}, high quality"

	# Make API request
	response = requests.post(
	self.image_gen_api_url,
	headers={
	"Authorization": f"Bearer {self.hf_token}",
	"Content-Type": "application/json"
	},
	json={"inputs": varied_prompt}
	)

	# Check response
	if response.status_code == 200:
	# Convert response to PIL Image
	image = Image.open(io.BytesIO(response.content))
	generated_images.append(image)
	else:
	print(f"Error generating variation {i+1}: {response.status_code}")

	return generated_images

	except Exception as e:
	print(f"An error occurred during image generation: {str(e)}")
	return []

	def create_gradio_interface():
	"""
	Create a Gradio interface for the Multimodal Image Creator

	Returns:
	gr.Blocks: Gradio interface
	"""
	# Initialize the multimodal image creator
	creator = MultimodalImageCreator()

	def process_image(input_image, num_variations):
	try:
	# Validate input
	if input_image is None:
	return None, "Please upload an image.", [], []

	# Save the uploaded image temporarily
	temp_image_path = "temp_input_image.jpg"
	Image.fromarray(input_image).save(temp_image_path)

	# Generate caption
	original_caption = creator.generate_caption(temp_image_path)

	# Create variations
	generated_images = creator.generate_variations(
	original_caption,
	num_variations=num_variations
	)

	# Clean up temporary file
	os.remove(temp_image_path)

	# Generate variation captions
	variation_captions = [
	f"Variation based on: {original_caption}"
	for _ in generated_images
	]

	return input_image, original_caption, generated_images, variation_captions

	except Exception as e:
	return None, f"An error occurred: {str(e)}", [], []

	# Create Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# Multimodal Image Content Creator")
	gr.Markdown("Upload an image to generate a caption and create variations!")

	with gr.Row():
	# Input components
	with gr.Column():
	input_image = gr.Image(type="numpy", label="Upload Image")
	num_variations = gr.Slider(
	minimum=1,
	maximum=5,
	value=3,
	step=1,
	label="Number of Variations"
	)
	print(num_variations)
	submit_btn = gr.Button("Generate Variations")

	# Output components
	with gr.Column():
	# Original image and caption
	original_image_output = gr.Image(label="Original Image")
	original_caption = gr.Textbox(label="Generated Caption")

	# Variations gallery
	variations_gallery = gr.Gallery(label="Image Variations")
	variations_captions = gr.Textbox(label="Variation Prompts")

	# Set up the processing
	submit_btn.click(
	fn=process_image,
	inputs=[input_image, num_variations],
	outputs=[
	original_image_output,
	original_caption,
	variations_gallery,
	variations_captions
	]
	)

	return demo

	# Create and launch the Gradio interface
	demo = create_gradio_interface()

	# If running locally
	if __name__ == "__main__":
	demo.launch(
	share=True,
	debug=True
	)