Spaces:

scmlewis
/

image_edit_generation

Sleeping

File size: 7,644 Bytes

import os
import tempfile
from PIL import Image
import gradio as gr
from google import genai
from google.genai import types

# Helpers
def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
    client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")))
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
        image_path = tmp_img.name
        pil_image.save(image_path)

    files = [client.files.upload(file=image_path)]
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type),
                types.Part.from_text(text=prompt),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_modalities=["image", "text"],
        response_mime_type="text/plain",
    )

    text_response = ""
    image_out_path = None

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_out:
        out_path = tmp_out.name
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                continue
            candidate = chunk.candidates[0].content.parts[0]
            if candidate.inline_data:
                save_binary_file(out_path, candidate.inline_data.data)
                image_out_path = out_path
                break
            else:
                text_response += chunk.text + "\n"
    del files
    return image_out_path, text_response

def process_image_and_prompt(pil_image, prompt, api_key):
    try:
        image_path, text_out = generate_edit(prompt, pil_image, api_key)
        if image_path:
            img = Image.open(image_path)
            if img.mode == "RGBA":
                img = img.convert("RGB")
            return img # Return only the image on success
        else:
            # If no image generated, raise an error for Gradio popup
            raise gr.Error(f"⚠️ Image generation failed: {text_out.strip() if text_out.strip() else 'No specific error message.'}")
    except Exception as e:
        # Catch any other exceptions and re-raise as Gradio error
        raise gr.Error(f"❌ Generation failed: {str(e)}")

def reset_inputs(api_key_value=None):
    # Reset all inputs, keeping API key unchanged
    return None, "", api_key_value or ""

# Styles with gradient background for body/app container
css_style = """
:root { --bg: #0f111a; --panel: #1b1e28; --text: #e9eefc; --muted: #9fb3c8; --accent: #6a8efd; }
body, .app-container {
  /* Gradient background instead of solid */
  background: linear-gradient(135deg, #2a3a67, #1b254b);
  color: var(--text);
}
.header-block { width: 100%; display: flex; justify-content: center; padding: 8px 0; }
.header-gradient { width: 100%; padding: 20px 0; border-radius: 12px; background: linear-gradient(90deg, #6a8efd, #44abc7); text-align: center; }
.header-title { margin: 0; font-size: 2.6rem; font-weight: 900; color: #fff; text-shadow: 0 2px 8px rgba(0,0,0,.25); }
.header-subtitle { margin-top: 6px; font-size: 1.05rem; color: #e8f0ff; }

.main { display: flex; gap: 20px; align-items: stretch; padding: 0 12px; }
.sidebar { width: 320px; background: #1a1e2a; padding: 14px; border-radius: 12px; min-height: 360px; box-shadow: 0 2px 10px rgb(0 0 0 / 0.25); }
.sidebar h2 { color: #8ab4ff; font-size: 1rem; margin: 6px 0; }
.sidebar ul { margin: 0; padding-left: 18px; color: #d6e3ff; line-height: 1.9; }
.sidebar a { color: #97b7ff; text-decoration: none; }
.sidebar a:hover { text-decoration: underline; }

.main-panel { flex: 1; display: flex; flex-direction: column; gap: 12px; }

.section-header { font-weight: 800; font-size: 1.04rem; color: #cbd5e1; margin: 6px 0; }

.layout-two-col {
  display: grid;
  grid-template-columns: 1fr 1fr;
  gap: 14px;
}
@media (max-width: 1100px) {
  .layout-two-col { grid-template-columns: 1fr; }
}

#output-viewport { display: flex; justify-content: center; align-items: center; min-height: 260px; }
#output-image { display: flex; justify-content: center; align-items: center; }
#output-image img { max-width: 100%; max-height: 420px; object-fit: contain; border-radius: 12px; background: #23252b; }
"""

# Layout
with gr.Blocks(css=css_style) as app:
    gr.HTML("""
    <div class='header-block'>
      <div class='header-gradient'>
        <h1 class='header-title'>🖼️ Image Editor <span style="font-size:1.1em;">(Powered by Gemini)</span> 🔮</h1>
        <div class='header-subtitle'>Step-by-step prompts for image editing</div>
      </div>
    </div>
    """)

    with gr.Row():
        with gr.Column(scale=3, elem_classes="sidebar"):
            gr.Markdown(
                """
                <h2>📖 How to Use</h2>
                <ul>
                  <li>Step-by-step prompts guide the editing process.</li>
                  <li>Upload a PNG image, enter a prompt, then generate.</li>
                  <li>Keep your Gemini API key secure.</li>
                </ul>
                <hr>
                <h2>🔑 API Key</h2>
                <div>Get your key here: <a href="https://aistudio.google.com/apikey" target="_blank">Get your Google API key</a></div>
                """
            )
        with gr.Column(scale=9, elem_classes="main-panel"):
            # Step 1 & Step 3 side-by-side
            with gr.Row():
                with gr.Column():
                    gr.Markdown("<div class='section-header'>Step 1: Upload Image</div>")
                    image_input = gr.Image(type="pil", label=None, image_mode="RGBA")
                with gr.Column():
                    gr.Markdown("<div class='section-header'>Step 3: Image Output</div>")
                    output_image = gr.Image(label=None, show_label=False, type="pil")
            # Step 2: Prompt + API
            gr.Markdown("<div class='section-header'>Step 2: Enter Editing Prompt</div>")
            prompt_input = gr.Textbox(label="Edit Prompt", placeholder="Describe how to edit the image", lines=2)
            api_key_input = gr.Textbox(label="Gemini API Key (required)", placeholder="Enter your Gemini API key here", type="password")

            with gr.Row():
                submit_btn = gr.Button("Generate Edit", elem_classes="gradient-button")
                reset_btn = gr.Button("Reset Inputs")
            
            # Note: Status bar elements removed as requested. Errors will now show as Gradio popups.

            def on_submit(pil_img, prompt, key):
                if not key or key.strip() == "":
                    raise gr.Error("Gemini API Key is required!")
                # process_image_and_prompt now raises gr.Error directly for failures
                return process_image_and_prompt(pil_img, prompt, key)

            submit_btn.click(
                fn=on_submit,
                inputs=[image_input, prompt_input, api_key_input],
                outputs=[output_image] # Only output the image
            )

            reset_btn.click(
                fn=reset_inputs,
                inputs=[api_key_input],
                outputs=[image_input, prompt_input, api_key_input] # Remove status_bar from outputs
            )

app.launch()