Spaces:

scmlewis
/

image_edit_generation

Sleeping

App Files Files Community

scmlewis commited on Oct 21, 2025

Commit

2890c7d

verified ·

1 Parent(s): 9ee7032

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -286

app.py CHANGED Viewed

@@ -1,296 +1,118 @@
-import os
-import tempfile
-import time
-from PIL import Image
-import gradio as gr
-from google import genai
-from google.genai import types
-# Helpers
-def save_binary_file(file_name, data):
-    with open(file_name, "wb") as f:
-        f.write(data)
-def generate_edit(prompt, pil_image, api_key, model="gemini-2.0-flash-exp"):
-    # Initialize client
-    client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")))
-    # Save image to a temp path for upload
-    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
-        image_path = tmp_img.name
-        pil_image.save(image_path)
-    # Upload and prepare content
-    files = [client.files.upload(file=image_path)]
-    contents = [
-        types.Content(
-            role="user",
-            parts=[
-                types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type),
-                types.Part.from_text(text=prompt),
-            ],
-        ),
-    ]
-    # Config with image + text modalities
-    generate_content_config = types.GenerateContentConfig(
-        temperature=1,
-        top_p=0.95,
-        top_k=40,
-        max_output_tokens=8192,
-        response_modalities=["image", "text"],
-        response_mime_type="text/plain",
-    )
-    text_response = ""
-    image_out_path = None
-    # Streamed generation to capture inline image data
-    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_out:
-        out_path = tmp_out.name
-        for chunk in client.models.generate_content_stream(
-            model=model,
-            contents=contents,
-            config=generate_content_config,
-        ):
-            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
-                continue
-            candidate = chunk.candidates[0].content.parts[0]
-            if candidate.inline_data:
-                save_binary_file(out_path, candidate.inline_data.data)
-                image_out_path = out_path
-                break
-            else:
-                text_response += chunk.text + "\n"
-    # Cleanup
-    try:
-        del files
-    except Exception:
-        pass
-    return image_out_path, text_response
-def process_image_and_prompt(pil_image, prompt, api_key, progress_callback=None):
-    try:
-        # Indicate starting
-        if progress_callback:
-            progress_callback("Generating…")
-        image_path, text_out = generate_edit(prompt, pil_image, api_key)
-        if image_path:
-            img = Image.open(image_path)
-            if img.mode == "RGBA":
-                img = img.convert("RGB")
-            # success
-            if progress_callback:
-                progress_callback("Done ✓")
-            return img, "Image generated successfully!", None
-        else:
-            # fail to generate image
-            if progress_callback:
-                progress_callback("Failed to generate image")
-            return None, f"⚠️ {text_out.strip()}", None
-    except Exception as e:
-        if progress_callback:
-            progress_callback("Error")
-        return None, f"❌ Generation failed: {str(e)}", None
-def reset_inputs(api_key_value=None):
-    return None, "", api_key_value or "", ""
-# Styles
-css_style = """
-:root {
-  --bg: #14161c;
-  --panel: #1e1f25;
-  --text: #e8eaf6;
-  --muted: #a0aec0;
-  --accent: #6a8efd;
-}
-body, .app-container {
-  background: var(--bg);
-  color: var(--text);
-}
-.header-block {
-  width: 100%;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  padding: 18px;
-}
-.header-gradient {
-  width: 100%;
-  padding: 28px 0;
-  border-radius: 14px;
-  background: linear-gradient(90deg, #6a8efd, #44abc7);
-  box-shadow: 0 2px 12px rgb(50 50 70 / 12%);
-  text-align: center;
-}
-.header-title {
-  margin: 0;
-  font-size: 2.8rem;
-  font-weight: 900;
-  color: #fff;
-  text-shadow: 1px 3px 12px rgba(0,0,0,.25);
-}
-.header-subtitle {
-  margin-top: 6px;
-  font-size: 1.05rem;
-  color: #e8f2ff;
-}
-.gradient-button {
-  background: linear-gradient(90deg, #44abc7, #6a8efd);
-  color: white;
-  font-weight: 700;
-  border: none;
-  padding: 12px 28px;
-  border-radius: 10px;
-  cursor: pointer;
-  transition: background 0.25s ease;
-}
-.gradient-button:hover {
-  background: linear-gradient(90deg, #6a8efd, #44abc7);
-}
-.main {
-  display: flex;
-  gap: 22px;
 }
-.sidebar {
-  background: #1f2230;
-  padding: 20px;
-  border-radius: 12px;
-  min-height: 360px;
-  width: 320px;
-  box-shadow: 0 2px 10px rgb(0 0 0 / 0.25);
-}
-.sidebar h2 {
-  color: #8ab4ff;
-  font-size: 1rem;
-  margin: 6px 0 8px;
-}
-.sidebar ul {
-  margin: 0;
-  padding-left: 18px;
-  color: #dbeafe;
-  line-height: 1.8;
-}
-.sidebar a { color: #97b7ff; text-decoration: none; }
-.sidebar a:hover { text-decoration: underline; }
-.main-panel {
-  flex: 1;
-  min-width: 0;
-}
-.section-header {
-  font-size: 1.15rem;
-  font-weight: 700;
-  color: #cbd5e1;
-  margin: 8px 0;
-}
-.input-area, .output-area {
-  background: #1b1e28;
-  border-radius: 12px;
-  padding: 14px;
-  box-shadow: inset 0 0 0 rgba(0,0,0,0.0);
-}
-.input-area { margin-bottom: 12px; }
-.output-area { margin-top: 6px; text-align: center; }
-#status-text {
-  height: 1.2em;
-  line-height: 1.2em;
-  font-weight: 600;
-  text-align: left;
-  overflow: hidden; /* prevent scrollbars for single line */
-  white-space: nowrap;
-}
-#output-image {
-  display: flex;
-  justify-content: center;
-  align-items: center;
-}
-#output-image img {
-  max-width: 100%;
-  max-height: 420px;
-  width: auto;
-  height: auto;
-  object-fit: contain;
-  border-radius: 12px;
-  background: #23252b;
-}
-.input-header { font-family: inherit; margin: 6px 0 6px; font-weight: 700; }
-.small { font-size: .9rem; color: var(--muted); }
 """
-# Layout
-with gr.Blocks(css=css_style) as app:
-    # Header
-    gr.HTML("""
-    <div class='header-block'>
-      <div class='header-gradient'>
-        <h1 class='header-title'>🖼️ Image Editor <span style="font-size:1.1em;">(Powered by Gemini)</span> 🔮</h1>
-        <div class='header-subtitle'>Edit images with AI, fast and simple.</div>
-      </div>
-    </div>
-    """)
-    with gr.Row():
-        # Sidebar (instructions)
-        with gr.Column(scale=3, elem_classes="sidebar"):
-            gr.Markdown(
-                """
-                <h2>📖 How to Use</h2>
-                <ul>
-                  <li>Step-by-step prompts guide the editing process.</li>
-                  <li>Upload a PNG image, enter a prompt, then generate.</li>
-                  <li>Keep your Gemini API key secure.</li>
-                </ul>
-                <hr>
-                <h2>🔑 API Key</h2>
-                <div>Get your key here: <a href="https://aistudio.google.com/apikey" target="_blank">Get your Google API key</a></div>
-                """
-            )
-        # Main panel (steps and outputs)
-        with gr.Column(scale=9, elem_classes="main-panel"):
-            with gr.Column():
-                # Step 1: Upload Image
-                gr.Markdown("<div class='section-header'>Step 1: Upload Image</div>")
-                image_input = gr.Image(type="pil", label=None, image_mode="RGBA")
-                # Step 2: Prompt + API Key
-                gr.Markdown("<div class='section-header'>Step 2: Enter Editing Prompt</div>")
-                prompt_input = gr.Textbox(label="Edit Prompt", placeholder="Describe how to edit the image", lines=2)
-                api_key_input = gr.Textbox(label="Gemini API Key (required)", placeholder="Enter your Gemini API key here", type="password")
-                with gr.Row():
-                    submit_btn = gr.Button("Generate Edit", elem_classes="gradient-button")
-                    reset_btn = gr.Button("Reset Inputs")
-                # Step 3: Output
-                gr.Markdown("<div class='section-header'>Step 3: Image Output</div>")
-                output_image = gr.Image(label=None, show_label=False, type="pil")
-                status_text = gr.Textbox(label="Status", interactive=False, lines=1, elem_id="status-text")
-            # Callback wiring
-            def on_submit(pil_img, prompt, key, progress=None):
-                if not key or key.strip() == "":
-                    raise gr.Error("Gemini API Key is required!")
-                # progress: a function to update status text
-                def update(msg):
-                    if progress:
-                        progress(msg)
-                img, stat, _ = process_image_and_prompt(pil_img, prompt, key)
-                update("Completed" if img is not None else stat)
-                return img, stat
-            submit_btn.click(
-                fn=on_submit,
-                inputs=[image_input, prompt_input, api_key_input],
-                outputs=[output_image, status_text]
-            )
-            reset_btn.click(
-                fn=reset_inputs,
-                inputs=[api_key_input],
-                outputs=[image_input, prompt_input, api_key_input, status_text]
-            )
-app.launch()

+custom_css = """
+/* Center main content and lock max width to 900px, with responsive shrink */
+#main-app-area {
+    max-width: 900px;
+    margin-left: auto;
+    margin-right: auto;
+    padding: 0 16px;
+}
+/* Responsive for mobile (<950px) */
+@media (max-width: 950px) {
+    #main-app-area {
+        max-width: 99vw;
+        padding: 0 2vw;
+    }
+}
+#app-title {
+    text-align: center;
+    font-size: 38px;
+    color: #53c9fc;
+    font-weight: bold;
+    padding-top: 12px;
+}
+#instructions {
+    text-align: center;
+    font-size: 19px;
+    margin: 14px 0 22px 0;
+}
+#generate-btn {
+    background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
+    color: white;
+    font-size: 18px;
+    font-weight: bold;
+    border: none;
+    border-radius: 11px;
+    margin-top: 8px;
+    margin-bottom: 14px;
+    transition: 0.2s;
+}
+#generate-btn:hover {
+    filter: brightness(1.08);
+    box-shadow: 0 2px 16px #9efbc344;
 }
 """
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from ultralytics import YOLO
+import torch
+import gradio as gr
+from PIL import Image
+from collections import deque
+import numpy as np
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+detect_model = YOLO('yolov5s.pt')
+MEMORY_SIZE = 10
+last_images = deque([], maxlen=MEMORY_SIZE)
+last_captions = deque([], maxlen=MEMORY_SIZE)
+def preprocess_image(image):
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    return image
+def detect_objects(image):
+    img_np = np.array(image)
+    results = detect_model(img_np)
+    detected_objs = set()
+    for r in results:
+        for box in r.boxes.data.tolist():
+            class_id = int(box[-1])
+            label = detect_model.names[class_id]
+            detected_objs.add(label)
+    return list(detected_objs)
+def generate_caption(image):
+    image = preprocess_image(image)
+    inputs = processor(image, return_tensors="pt")
+    out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    detected_objs = detect_objects(image)
+    last_images.append(image)
+    last_captions.append(caption)
+    tags = ", ".join(detected_objs) if detected_objs else "None"
+    gallery = [(img, f"Detected objects: {tags}\nCaption: {caption}") for img, caption in zip(list(last_images), list(last_captions))]
+    result_text = f"Detected objects: {tags}\nCaption: {caption}"
+    return result_text, gallery
+with gr.Blocks(css=custom_css) as iface:
+    gr.HTML('<div id="main-app-area">')  # Start content region
+    gr.HTML('<div id="app-title">🖼️ Image Captioning with Object Detection</div>')
+    gr.HTML(
+        '<div id="instructions">'
+        '🙌 <b>Welcome!</b> Instantly analyze images using AI.<br>'
+        '1️⃣ <b>Upload</b> your image.<br>'
+        '2️⃣ Click <b>⭐ Generate Caption</b>.<br>'
+        '3️⃣ View and scroll through your history below.<br>'
+        '📜 <i>Last 10 results are stored for you.</i>'
+        '</div>'
+    )
+    image_input = gr.Image(type="pil", label="Upload Image")
+    generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
+    caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=5, interactive=True)
+    gallery = gr.Gallery(label="Last 10 Images and Captions", scale=3)
+    def on_generate(image):
+        if image is None:
+            return "Please upload an image.", []
+        return generate_caption(image)
+    generate_btn.click(
+        fn=on_generate,
+        inputs=image_input,
+        outputs=[caption_output, gallery]
+    )
+    gr.HTML('</div>')  # End content region
+if __name__ == "__main__":
+    iface.launch()