Gemini-Image-Edit

Running

App Files Files Community

Gemini-Image-Edit / app.py

Varhal

Update app.py

63e554e verified 8 months ago

raw

history blame contribute delete

14.3 kB

	import json
	import os
	import time
	import uuid
	import tempfile
	from PIL import Image, ImageDraw, ImageFont
	import gradio as gr
	import base64
	import mimetypes
	from google import genai
	from google.genai import types

	# Helper function to save binary data
	def save_binary_file(file_name, data):
	with open(file_name, "wb") as f:
	f.write(data)

	# Function to get tags from an image using Gemini
	def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
	"""
	Analyzes an image using a text prompt and returns the text response.
	Used specifically for generating tags in this case.
	"""
	api_key = os.environ.get("geminigoogle")
	if not api_key:
	# Return a clear message if API key is missing
	return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging."

	client = genai.Client(api_key=api_key)
	uploaded_files = [] # Keep track of uploaded files for cleanup

	try:
	# Upload the file
	uploaded_files = [client.files.upload(file=file_name)]
	print(f"Uploaded file for tagging: {uploaded_files[0].uri}")

	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=uploaded_files[0].uri,
	mime_type=uploaded_files[0].mime_type,
	),
	types.Part.from_text(text=text_prompt),
	],
	),
	]

	# Configure for text-only response (focus on getting JSON)
	generate_content_config = types.GenerateContentConfig(
	temperature=0.5, # Lower temperature might give more focused tags
	top_p=0.95,
	top_k=40,
	max_output_tokens=1024,
	response_modalities=["text"],
	response_mime_type="text/plain", # Expect plain text
	)

	# Use generate_content for a single text response
	response = client.models.generate_content(
	model=model,
	contents=contents,
	config=generate_content_config,
	)

	tag_response = ""
	if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
	# Concatenate all text parts from the response
	for part in response.candidates[0].content.parts:
	if hasattr(part, 'text'):
	tag_response += part.text
	else:
	tag_response = "Could not generate tags."

	return tag_response

	except Exception as e:
	print(f"Error during tagging API call: {e}")
	return f"Error generating tags: {e}"
	finally:
	for file in uploaded_files:
	try:
	client.files.delete(name=file.name)
	print(f"Deleted uploaded file after tagging: {file.name}")
	except Exception as cleanup_e:
	print(f"Error deleting uploaded file {file.name}: {cleanup_e}")


	# Function for the main image processing call
	def generate(text, file_name, model="gemini-2.0-flash-exp"):
	api_key = os.environ.get("geminigoogle")
	if not api_key:
	raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")

	client = genai.Client(api_key=api_key)
	uploaded_files = []
	temp_output_image_path = None

	try:
	uploaded_files = [client.files.upload(file=file_name)]
	print(f"Uploaded file for generation: {uploaded_files[0].uri}")


	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=uploaded_files[0].uri,
	mime_type=uploaded_files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"], # Expecting potentially image and text
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_output_image_path = tmp.name

	print("Starting generation stream...")
	# Stream the response
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue

	for part in chunk.candidates[0].content.parts:
	# Check for text parts
	text_part = getattr(part, "text", "")
	if text_part:
	text_response += text_part

	if part.inline_data:
	print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
	save_binary_file(temp_output_image_path, part.inline_data.data)
	image_path = temp_output_image_path # Set the output image path

	print("Generation stream finished.")

	if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
	print("No valid image data was received or saved.")
	image_path = None

	return image_path, text_response.strip()

	except Exception as e:
	print(f"Error during main generation API call: {e}")
	if temp_output_image_path and os.path.exists(temp_output_image_path):
	os.remove(temp_output_image_path)
	raise e # Re-raise the exception after cleanup

	finally:
	for file in uploaded_files:
	try:
	client.files.delete(name=file.name)
	print(f"Deleted uploaded file after generation: {file.name}")
	except Exception as cleanup_e:
	print(f"Error deleting uploaded file {file.name}: {cleanup_e}")

	# Main processing function for Gradio
	def process_image_and_prompt(composite_pil, prompt, enable_tagging=True):
	composite_path = None # Initialize input temp file path for finally block
	output_image_path = None # Initialize output temp file path for finally block
	try:
	# 1. Save the input PIL image to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	composite_path = tmp.name
	if composite_pil.mode == "RGBA":
	composite_pil.save(composite_path, format="PNG")
	else:
	composite_pil.save(composite_path, format="PNG") # Save as PNG by default


	file_name = composite_path # This is the path to the saved input image file
	model = "gemini-2.0-flash-exp" # Specify the model here

	# 2. Call get_image_tags to get tags from the original image
	tag_json_string = ""
	if enable_tagging:
	tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else."
	tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)

	# 3. Call generate for the main image processing based on the user prompt
	output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)

	# 4. Combine the tag JSON string and the main text response
	# Format the output clearly
	if tag_json_string:
	final_text_output = f"{tag_json_string},{main_text_response}"
	else:
	final_text_output = main_text_response

	# 5. Prepare the image output for the Gradio gallery
	result_img = None
	image_output_list = []
	if output_image_path and os.path.exists(output_image_path):
	try:
	result_img = Image.open(output_image_path)
	# Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB)
	if result_img.mode == "RGBA":
	result_img = result_img.convert("RGB")
	image_output_list = [result_img] # Add the image to the list for the gallery
	except Exception as img_e:
	print(f"Error opening generated image {output_image_path}: {img_e}")
	# If image opening fails, don't return an image
	image_output_list = []
	# Append error to text response
	final_text_output += f"\n\n---\n\nError loading generated image: {img_e}"


	# 6. Return results to Gradio
	return image_output_list, final_text_output

	except Exception as e:
	# Catch any exceptions during the process
	print(f"An error occurred during processing: {e}")
	# Use Gradio's error handling to display a message in the UI
	raise gr.Error(f"Processing failed: {e}", duration=5)

	finally:
	# 7. Clean up temporary files
	# Clean up the temporary input file
	if composite_path and os.path.exists(composite_path):
	try:
	os.remove(composite_path)
	print(f"Deleted input temporary file: {composite_path}")
	except Exception as cleanup_e:
	print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")

	if output_image_path and os.path.exists(output_image_path):
	try:
	os.remove(output_image_path)
	print(f"Deleted output temporary file: {output_image_path}")
	except Exception as cleanup_e:
	print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}")


	# Gradio інтерфейс (unchanged from your original code, except connection)
	with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
	) as demo:
	gr.HTML(
	"""
	<div class="header-container">
	<div>
	<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
	</div>
	<div>
	<h1>Gemini for Image Editing</h1>
	<p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️\|
	<a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo \|
	<a href="https://aistudio.google.com/apikey">Get an API Key</a> \|
	Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
	</div>
	</div>
	"""
	)

	# Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
	with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
	gr.Markdown("""
	- Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.
	- ❗ Іноді модель повертає текст замість зображення.
	""")

	with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
	gr.Markdown("""
	### 📌 Usage
	- Upload an image and enter a prompt to generate outputs.
	- The response will include generated tags for the original image (in JSON format) and Gemini's text output.
	- If an edited image is returned, it will appear in the gallery. If not, only text will appear.
	- Upload Only PNG Image
	- ❌ Do not use NSFW images!
	""")

	with gr.Row(elem_classes="main-content"):
	with gr.Column(elem_classes="input-column"):
	image_input = gr.Image(
	type="pil",
	label="Upload Image",
	image_mode="RGBA",
	elem_id="image-input",
	elem_classes="upload-box"
	)
	prompt_input = gr.Textbox(
	lines=2,
	placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')",
	label="Prompt for Gemini",
	elem_classes="prompt-input"
	)
	with_tags = gr.Checkbox(label="Enable Tagging", value=True)
	submit_btn = gr.Button("Generate", elem_classes="generate-btn")

	with gr.Column(elem_classes="output-column"):
	output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True)
	output_text = gr.Textbox(
	label="Gemini Output (Tags + Response)",
	placeholder="Original image tags (JSON) and Gemini's response will appear here.",
	elem_classes="output-text",
	lines=10 # Give more space for combined output
	)

	# Connect the button click to the updated processing function
	submit_btn.click(
	fn=process_image_and_prompt,
	inputs=[image_input, prompt_input, with_tags],
	outputs=[output_gallery, output_text],
	)

	gr.Markdown("## Try these examples", elem_classes="gr-examples-header")

	examples = [
	["data/1.webp", 'change text to "AMEER"'],
	["data/2.webp", "remove the spoon from hand only"],
	["data/3.webp", 'change text to "Make it "'],
	["data/1.jpg", "add joker style only on face"],
	["data/1777043.jpg", "add joker style only on face"],
	["data/2807615.jpg", "add lipstick on lip only"],
	["data/76860.jpg", "add lipstick on lip only"],
	["data/2807615.jpg", "make it happy looking face only"],
	]

	gr.Examples(
	examples=examples,
	inputs=[image_input, prompt_input],
	elem_id="examples-grid"
	)

	demo.queue(max_size=50).launch()