GeditX / app.py
marahmerah's picture
Update app.py
a6a73c1 verified
import json
import os
import time
import uuid
import tempfile
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import base64
import mimetypes
from translatepy import Translator
from io import BytesIO
from google import genai
from google.genai import types
def save_binary_file(file_name, data):
with open(file_name, "wb") as f:
f.write(data)
def translate_to_english(text):
try:
translator = Translator()
result = translator.translate(text, destination_language="en")
return result.result
except Exception as e:
print(f"Translation error: {e}")
return text
def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
else os.environ.get("GEMINI_API_KEY")))
files = [client.files.upload(file=file_name)]
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=files[0].uri,
mime_type=files[0].mime_type,
),
types.Part.from_text(text=text),
],
),
]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=["image", "text"],
response_mime_type="text/plain",
)
text_response = ""
image_data = None
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
continue
candidate = chunk.candidates[0].content.parts[0]
if candidate.inline_data:
image_data = candidate.inline_data.data
break
else:
text_response += chunk.text + "\n"
del files
return image_data, text_response
def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
try:
# Translate prompt to English
translated_prompt = translate_to_english(prompt)
print(f"Original prompt: {prompt}, Translated prompt: {translated_prompt}")
# Save the composite image to memory as PNG
img_byte_arr = BytesIO()
composite_pil.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
# Create a temporary PNG file
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
tmp.write(img_byte_arr.getvalue())
file_name = tmp.name
image_data, text_response = generate(
text=translated_prompt,
file_name=file_name,
api_key=gemini_api_key,
model="gemini-2.0-flash-exp"
)
if image_data:
# Convert the binary image data to PNG format
img = Image.open(BytesIO(image_data))
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save to BytesIO as PNG
output_buffer = BytesIO()
img.save(output_buffer, format="PNG")
output_buffer.seek(0)
# Create PIL Image from buffer
result_img = Image.open(output_buffer)
return [result_img], ""
else:
return None, text_response
except Exception as e:
raise gr.Error(f"Error: {str(e)}", duration=5)
with gr.Blocks(css_paths="style.css") as demo:
gr.HTML(
"""
"""
)
with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
gr.Markdown("""
- **Issue:** ❗ Sometimes the model returns text instead of an image.
### 🔧 Steps to Address:
1. **🛠️ Duplicate the Repository**
- Create a separate copy for modifications.
2. **🔑 Use Your Own Gemini API Key**
- You **must** configure your own Gemini key for generation!
""")
with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
gr.Markdown("""
### 📌 Usage
- Upload an image (any format will be converted to PNG)
- Enter a prompt (will be automatically translated to English)
- Output will always be in PNG format
- If text is returned instead of an image, it will appear in the text output
- ❌ **Do not use NSFW images!**
""")
with gr.Row(elem_classes="main-content"):
with gr.Column(elem_classes="input-column"):
image_input = gr.Image(
type="pil",
label="Upload Image (will be converted to PNG)",
image_mode="RGBA",
elem_id="image-input",
elem_classes="upload-box"
)
gemini_api_key = gr.Textbox(
lines=1,
placeholder="Enter Gemini API Key (optional)",
label="Gemini API Key (optional)",
elem_classes="api-key-input"
)
prompt_input = gr.Textbox(
lines=2,
placeholder="Enter prompt here...",
label="Prompt",
elem_classes="prompt-input"
)
submit_btn = gr.Button("Generate", elem_classes="generate-btn")
with gr.Column(elem_classes="output-column"):
output_gallery = gr.Gallery(
label="Generated Outputs (PNG)",
elem_classes="output-gallery",
format="png" # Force Gradio to use PNG format
)
output_text = gr.Textbox(
label="Gemini Output",
placeholder="Text response will appear here if no image is generated.",
elem_classes="output-text"
)
submit_btn.click(
fn=process_image_and_prompt,
inputs=[image_input, prompt_input, gemini_api_key],
outputs=[output_gallery, output_text],
)
demo.queue(max_size=50).launch()