GeditX

Running

App Files Files Community

GeditX / app.py

marahmerah

Update app.py

a6a73c1 verified 10 months ago

raw

history blame contribute delete

6.19 kB

	import json
	import os
	import time
	import uuid
	import tempfile
	from PIL import Image, ImageDraw, ImageFont
	import gradio as gr
	import base64
	import mimetypes
	from translatepy import Translator
	from io import BytesIO

	from google import genai
	from google.genai import types

	def save_binary_file(file_name, data):
	with open(file_name, "wb") as f:
	f.write(data)

	def translate_to_english(text):
	try:
	translator = Translator()
	result = translator.translate(text, destination_language="en")
	return result.result
	except Exception as e:
	print(f"Translation error: {e}")
	return text

	def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
	client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
	else os.environ.get("GEMINI_API_KEY")))

	files = [client.files.upload(file=file_name)]

	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]
	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_data = None
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue
	candidate = chunk.candidates[0].content.parts[0]
	if candidate.inline_data:
	image_data = candidate.inline_data.data
	break
	else:
	text_response += chunk.text + "\n"

	del files
	return image_data, text_response

	def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
	try:
	# Translate prompt to English
	translated_prompt = translate_to_english(prompt)
	print(f"Original prompt: {prompt}, Translated prompt: {translated_prompt}")

	# Save the composite image to memory as PNG
	img_byte_arr = BytesIO()
	composite_pil.save(img_byte_arr, format='PNG')
	img_byte_arr.seek(0)

	# Create a temporary PNG file
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	tmp.write(img_byte_arr.getvalue())
	file_name = tmp.name

	image_data, text_response = generate(
	text=translated_prompt,
	file_name=file_name,
	api_key=gemini_api_key,
	model="gemini-2.0-flash-exp"
	)

	if image_data:
	# Convert the binary image data to PNG format
	img = Image.open(BytesIO(image_data))
	if img.mode == 'RGBA':
	img = img.convert('RGB')

	# Save to BytesIO as PNG
	output_buffer = BytesIO()
	img.save(output_buffer, format="PNG")
	output_buffer.seek(0)

	# Create PIL Image from buffer
	result_img = Image.open(output_buffer)
	return [result_img], ""
	else:
	return None, text_response
	except Exception as e:
	raise gr.Error(f"Error: {str(e)}", duration=5)

	with gr.Blocks(css_paths="style.css") as demo:
	gr.HTML(
	"""

	"""
	)

	with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
	gr.Markdown("""
	- Issue: ❗ Sometimes the model returns text instead of an image.
	### 🔧 Steps to Address:
	1. 🛠️ Duplicate the Repository
	- Create a separate copy for modifications.
	2. 🔑 Use Your Own Gemini API Key
	- You must configure your own Gemini key for generation!
	""")

	with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
	gr.Markdown("""
	### 📌 Usage
	- Upload an image (any format will be converted to PNG)
	- Enter a prompt (will be automatically translated to English)
	- Output will always be in PNG format
	- If text is returned instead of an image, it will appear in the text output
	- ❌ Do not use NSFW images!
	""")

	with gr.Row(elem_classes="main-content"):
	with gr.Column(elem_classes="input-column"):
	image_input = gr.Image(
	type="pil",
	label="Upload Image (will be converted to PNG)",
	image_mode="RGBA",
	elem_id="image-input",
	elem_classes="upload-box"
	)
	gemini_api_key = gr.Textbox(
	lines=1,
	placeholder="Enter Gemini API Key (optional)",
	label="Gemini API Key (optional)",
	elem_classes="api-key-input"
	)
	prompt_input = gr.Textbox(
	lines=2,
	placeholder="Enter prompt here...",
	label="Prompt",
	elem_classes="prompt-input"
	)
	submit_btn = gr.Button("Generate", elem_classes="generate-btn")

	with gr.Column(elem_classes="output-column"):
	output_gallery = gr.Gallery(
	label="Generated Outputs (PNG)",
	elem_classes="output-gallery",
	format="png" # Force Gradio to use PNG format
	)
	output_text = gr.Textbox(
	label="Gemini Output",
	placeholder="Text response will appear here if no image is generated.",
	elem_classes="output-text"
	)

	submit_btn.click(
	fn=process_image_and_prompt,
	inputs=[image_input, prompt_input, gemini_api_key],
	outputs=[output_gallery, output_text],
	)

	demo.queue(max_size=50).launch()