Spaces:

scmlewis
/

image_edit_generation

Sleeping

App Files Files Community

scmlewis commited on Oct 21, 2025

Commit

4cf5765

verified ·

1 Parent(s): 160b28a

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -68

app.py CHANGED Viewed

@@ -12,10 +12,15 @@ def save_binary_file(file_name, data):
         f.write(data)
 def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
     client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")))
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
         image_path = tmp_img.name
         pil_image.save(image_path)
     files = [client.files.upload(file=image_path)]
     contents = [
         types.Content(
@@ -26,6 +31,8 @@ def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
             ],
         ),
     ]
     generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
@@ -34,8 +41,11 @@ def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
         response_modalities=["image", "text"],
         response_mime_type="text/plain",
     )
     text_response = ""
-    image_path_result = None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_out:
         out_path = tmp_out.name
         for chunk in client.models.generate_content_stream(
@@ -48,128 +58,227 @@ def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
             candidate = chunk.candidates[0].content.parts[0]
             if candidate.inline_data:
                 save_binary_file(out_path, candidate.inline_data.data)
-                image_path_result = out_path
                 break
             else:
                 text_response += chunk.text + "\n"
-    del files
-    return image_path_result, text_response
-def process_image_and_prompt(pil_image, prompt, api_key, progress=None):
     try:
-        if progress:
-            progress("Generating…")
         image_path, text_out = generate_edit(prompt, pil_image, api_key)
         if image_path:
             img = Image.open(image_path)
             if img.mode == "RGBA":
                 img = img.convert("RGB")
-            if progress:
-                progress("Completed ✓")
             return img, "Image generated successfully!", None
         else:
-            if progress:
-                progress(f"Warning: {text_out.strip() or 'No image generated.'}")
-            return None, f"⚠️ {text_out.strip()?text_out.strip():'No image generated.'}", None
     except Exception as e:
-        if progress:
-            progress("Error")
         return None, f"❌ Generation failed: {str(e)}", None
-def reset_inputs(api_key_value=""):
-    return None, "", api_key_value, ""
 # Styles
 css_style = """
-:root{--bg:#0e111a;--panel:#151a24;--text:#e9eefc;--muted:#9fb3c8;--accent:#6a8efd}
-html,body, #app{height:100%}
-body{background:#0e111a;color:var(--text);font-family:Inter, system-ui, Arial}
-.header-block{width:100%; display:flex; justify-content:center; padding:14px 0 6px}
-.header-gradient{width:100%; padding:26px 0; border-radius:12px; background:linear-gradient(90deg,#6a8efd,#44abc7); box-shadow:0 2px 12px rgb(0 0 0 / 15%)}
-.header-title{margin:0; font-size:2.8rem; font-weight:900; color:#fff; text-shadow:0 2px 8px rgba(0,0,0,.25)}
-.header-subtitle{color:#e7f0ff; font-size:1.05rem; margin-top:6px}
-.gradient-button{background:linear-gradient(90deg,#44abc7,#6a8efd); border:none; color:white; font-weight:700; padding:12px 28px; border-radius:10px; cursor:pointer; margin-right:8px}
-.gradient-button:hover{background:linear-gradient(90deg,#6a8efd,#44abc7)}
-.main{display:flex; gap:22px; align-items:flex-start; padding:0 14px}
-.sidebar{width:320px; background:#141923; padding:18px; border-radius:12px; box-shadow:0 2px 10px rgb(0 0 0 / 0.25)}
-.sidebar h2{color:#8ab4ff; font-size:1rem; margin:8px 0}
-.sidebar ul{margin:0 0 12px 18px; padding:0; color:#dbeafe; line-height:2}
-.main-panel{flex:1; min-width:0}
-.section-header{font-size:1.15rem; font-weight:800; color:#cbd5e1; margin:6px 0 8px}
-.row{display:flex; gap:16px; align-items:flex-start}
-.input-area, .output-area{ background:#1b2030; border-radius:12px; padding:12px; margin-bottom:8px}
-#status-text{ height:1.6em; line-height:1.6em; resize:none; overflow:hidden; text-align:left; padding:6px 8px; border-radius:6px; border:1px solid #2d2f40; background:#0f1320; color:#cbd5e1; font-weight:600}
-#output-image{ display:flex; justify-content:center; align-items:center; min-height:240px; padding:6px 0}
-#output-image img{ max-width:100%; max-height:420px; object-fit:contain; border-radius:12px; background:#23252b}
-@media (max-width: 1100px){
-  .sidebar{ display:none } /* optional: collapse for narrow screens to preserve space */
-  .main{ gap:12px }
 }
 """
-# UI
 with gr.Blocks(css=css_style) as app:
     gr.HTML(
         """
         <div class='header-block'>
           <div class='header-gradient'>
-            <h1 class='header-title'>🖼️ Image Editor <span style="font-size:1.2em"> (Powered by Gemini)</span> 🔮</h1>
             <div class='header-subtitle'>Step-by-step prompts with a persistent status banner and progress feedback</div>
           </div>
         </div>
         """
     )
-    with gr.Row(class_name="main"):
-        # Sidebar with instructions and API key link
         with gr.Column(scale=3, elem_classes="sidebar"):
             gr.Markdown(
                 """
                 <h2>📖 How to Use</h2>
                 <ul>
-                  <li>Step 1: Upload Image</li>
-                  <li>Step 2: Enter Editing Prompt</li>
-                  <li>Step 3: View Output</li>
                 </ul>
-                <hr style='border-color:#2c3244' />
                 <h2>🔑 API Key</h2>
                 <div>Get your key here: <a href="https://aistudio.google.com/apikey" target="_blank">Get your Google API key</a></div>
                 """
             )
-        # Main workflow
         with gr.Column(scale=9, elem_classes="main-panel"):
-            with gr.Row():
                 # Step 1: Upload Image
-                with gr.Column():
-                    gr.Markdown("<div class='section-header'>Step 1: Upload Image</div>")
-                    image_input = gr.Image(type="pil", label=None, image_mode="RGBA")
-                    # Step 2: Prompt + API key
-                    gr.Markdown("<div class='section-header'>Step 2: Enter Editing Prompt</div>")
-                    prompt_input = gr.Textbox(label="Edit Prompt", placeholder="Describe how to edit the image", lines=2)
-                    api_key_input = gr.Textbox(label="Gemini API Key (required)", placeholder="Enter your Gemini API key here", type="password")
-                    with gr.Row():
-                        submit_btn = gr.Button("Generate Edit", elem_classes="gradient-button")
-                        reset_btn = gr.Button("Reset Inputs")
                 # Step 3: Output
-                with gr.Column():
-                    gr.Markdown("<div class='section-header'>Step 3: Image Output</div>")
-                    output_image = gr.Image(label=None, show_label=False, type="pil")
-                    status_text = gr.Textbox(label="Status", interactive=False, lines=1, id="status-text")
-            # Callbacks
             def on_submit(pil_img, prompt, key, progress=None):
                 if not key or key.strip() == "":
                     raise gr.Error("Gemini API Key is required!")
                 img, stat, _ = process_image_and_prompt(pil_img, prompt, key, progress)
                 return img, stat
-            def progress_fn(msg):
-                # local progress hook: simply returns a placeholder by updating status_text listener
-                return
             submit_btn.click(
                 fn=on_submit,
                 inputs=[image_input, prompt_input, api_key_input],

         f.write(data)
 def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
+    # Initialize client
     client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")))
+    # Save image to a temp path for upload
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
         image_path = tmp_img.name
         pil_image.save(image_path)
+    # Upload and prepare content
     files = [client.files.upload(file=image_path)]
     contents = [
         types.Content(
             ],
         ),
     ]
+    # Config with image + text modalities
     generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
         response_modalities=["image", "text"],
         response_mime_type="text/plain",
     )
     text_response = ""
+    image_out_path = None
+    # Streamed generation to capture inline image data
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_out:
         out_path = tmp_out.name
         for chunk in client.models.generate_content_stream(
             candidate = chunk.candidates[0].content.parts[0]
             if candidate.inline_data:
                 save_binary_file(out_path, candidate.inline_data.data)
+                image_out_path = out_path
                 break
             else:
                 text_response += chunk.text + "\n"
+    # Cleanup
+    try:
+        del files
+    except Exception:
+        pass
+    return image_out_path, text_response
+def process_image_and_prompt(pil_image, prompt, api_key, progress_callback=None):
     try:
+        # Indicate starting
+        if progress_callback:
+            progress_callback("Generating…")
         image_path, text_out = generate_edit(prompt, pil_image, api_key)
         if image_path:
             img = Image.open(image_path)
             if img.mode == "RGBA":
                 img = img.convert("RGB")
+            # success
+            if progress_callback:
+                progress_callback("Done ✓")
             return img, "Image generated successfully!", None
         else:
+            # fail to generate image
+            if progress_callback:
+                progress_callback("Failed to generate image")
+            return None, f"⚠️ {text_out.strip()}", None
     except Exception as e:
+        if progress_callback:
+            progress_callback("Error")
         return None, f"❌ Generation failed: {str(e)}", None
+def reset_inputs(api_key_value=None):
+    return None, "", api_key_value or "", ""
 # Styles
 css_style = """
+:root {
+  --bg: #14161c;
+  --panel: #1e1f25;
+  --text: #e8eaf6;
+  --muted: #a0aec0;
+  --accent: #6a8efd;
+}
+body, .app-container {
+  background: var(--bg);
+  color: var(--text);
+}
+.header-block {
+  width: 100%;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  padding: 18px;
+}
+.header-gradient {
+  width: 100%;
+  padding: 28px 0;
+  border-radius: 14px;
+  background: linear-gradient(90deg, #6a8efd, #44abc7);
+  box-shadow: 0 2px 12px rgb(50 50 70 / 12%);
+  text-align: center;
+}
+.header-title {
+  margin: 0;
+  font-size: 2.8rem;
+  font-weight: 900;
+  color: #fff;
+  text-shadow: 1px 3px 12px rgba(0,0,0,.25);
+}
+.header-subtitle {
+  margin-top: 6px;
+  font-size: 1.05rem;
+  color: #e8f2ff;
+}
+.gradient-button {
+  background: linear-gradient(90deg, #44abc7, #6a8efd);
+  color: white;
+  font-weight: 700;
+  border: none;
+  padding: 12px 28px;
+  border-radius: 10px;
+  cursor: pointer;
+  transition: background 0.25s ease;
+}
+.gradient-button:hover {
+  background: linear-gradient(90deg, #6a8efd, #44abc7);
+}
+.main {
+  display: flex;
+  gap: 22px;
+}
+.sidebar {
+  background: #1f2230;
+  padding: 20px;
+  border-radius: 12px;
+  min-height: 360px;
+  width: 320px;
+  box-shadow: 0 2px 10px rgb(0 0 0 / 0.25);
+}
+.sidebar h2 {
+  color: #8ab4ff;
+  font-size: 1rem;
+  margin: 6px 0 8px;
+}
+.sidebar ul {
+  margin: 0;
+  padding-left: 18px;
+  color: #dbeafe;
+  line-height: 1.8;
+}
+.sidebar a { color: #97b7ff; text-decoration: none; }
+.sidebar a:hover { text-decoration: underline; }
+.main-panel {
+  flex: 1;
+  min-width: 0;
+}
+.section-header {
+  font-size: 1.15rem;
+  font-weight: 700;
+  color: #cbd5e1;
+  margin: 8px 0;
+}
+.input-area, .output-area {
+  background: #1b1e28;
+  border-radius: 12px;
+  padding: 14px;
+  box-shadow: inset 0 0 0 rgba(0,0,0,0.0);
+}
+.input-area { margin-bottom: 12px; }
+.output-area { margin-top: 6px; text-align: center; }
+#status-text {
+  height: 1.2em;
+  line-height: 1.2em;
+  font-weight: 600;
+  text-align: left;
+  overflow: hidden;
+  white-space: nowrap;
+  padding: 0;
+  border: none;
+  background: transparent;
+  color: #cbd5e1;
+}
+#output-image {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+}
+#output-image img {
+  max-width: 100%;
+  max-height: 420px;
+  width: auto;
+  height: auto;
+  object-fit: contain;
+  border-radius: 12px;
+  background: #23252b;
 }
 """
+# Layout
 with gr.Blocks(css=css_style) as app:
     gr.HTML(
         """
         <div class='header-block'>
           <div class='header-gradient'>
+            <h1 class='header-title'>🖼️ Image Editor <span style="font-size:1.1em;">(Powered by Gemini)</span> 🔮</h1>
             <div class='header-subtitle'>Step-by-step prompts with a persistent status banner and progress feedback</div>
           </div>
         </div>
         """
     )
+    with gr.Row():
+        # Sidebar (instructions)
         with gr.Column(scale=3, elem_classes="sidebar"):
             gr.Markdown(
                 """
                 <h2>📖 How to Use</h2>
                 <ul>
+                  <li>Step-by-step prompts guide the editing process.</li>
+                  <li>Upload a PNG image, enter a prompt, then generate.</li>
+                  <li>Keep your Gemini API key secure.</li>
                 </ul>
+                <hr>
                 <h2>🔑 API Key</h2>
                 <div>Get your key here: <a href="https://aistudio.google.com/apikey" target="_blank">Get your Google API key</a></div>
                 """
             )
+        # Main panel (steps and outputs)
         with gr.Column(scale=9, elem_classes="main-panel"):
+            with gr.Column():
                 # Step 1: Upload Image
+                gr.Markdown("<div class='section-header'>Step 1: Upload Image</div>")
+                image_input = gr.Image(type="pil", label=None, image_mode="RGBA")
+                # Step 2: Prompt + API Key
+                gr.Markdown("<div class='section-header'>Step 2: Enter Editing Prompt</div>")
+                prompt_input = gr.Textbox(label="Edit Prompt", placeholder="Describe how to edit the image", lines=2)
+                api_key_input = gr.Textbox(label="Gemini API Key (required)", placeholder="Enter your Gemini API key here", type="password")
+                with gr.Row():
+                    submit_btn = gr.Button("Generate Edit", elem_classes="gradient-button")
+                    reset_btn = gr.Button("Reset Inputs")
                 # Step 3: Output
+                gr.Markdown("<div class='section-header'>Step 3: Image Output</div>")
+                output_image = gr.Image(label=None, show_label=False, type="pil")
+                status_text = gr.Textbox(label="Status", interactive=False, lines=1, elem_id="status-text")
+            # Callback wiring
             def on_submit(pil_img, prompt, key, progress=None):
                 if not key or key.strip() == "":
                     raise gr.Error("Gemini API Key is required!")
                 img, stat, _ = process_image_and_prompt(pil_img, prompt, key, progress)
                 return img, stat
             submit_btn.click(
                 fn=on_submit,
                 inputs=[image_input, prompt_input, api_key_input],