Spaces:

aadarsh99
/

ConverSeg

Running on Zero

App Files Files Community

aadarsh99 commited on Jan 21

Commit

17751a0

1 Parent(s): 9f6e3d0

release

Browse files

Files changed (1) hide show

app.py +45 -16

app.py CHANGED Viewed

@@ -105,10 +105,14 @@ def ensure_models_loaded():
 # ----------------- GPU Inference -----------------
 @spaces.GPU(duration=120)
-def run_prediction(image_pil, text_prompt, threshold=0.5):
-    if image_pil is None or not text_prompt:
         return None, None, None
     ensure_models_loaded()
     sam_model = MODEL_CACHE["sam"]
     plm_model = MODEL_CACHE["plm"]
@@ -140,7 +144,7 @@ def run_prediction(image_pil, text_prompt, threshold=0.5):
             with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp:
                 image_pil.save(tmp.name)
                 # Qwen/PLM processes the text prompt here
-                sp, dp = plm_model([text_prompt], image_emb.shape[2], image_emb.shape[3], [tmp.name])
             # SAM2 Mask Decoder
             dec = sam_model.sam_mask_decoder
@@ -169,7 +173,8 @@ def run_prediction(image_pil, text_prompt, threshold=0.5):
         heatmap_rgb = cv2.cvtColor(heatmap_cv, cv2.COLOR_BGR2RGB)
         mask = (prob > threshold).astype(np.uint8) * 255
-        overlay = make_overlay(rgb_orig, mask, key=text_prompt)
         return overlay, Image.fromarray(heatmap_rgb), prob
@@ -182,13 +187,15 @@ def run_prediction(image_pil, text_prompt, threshold=0.5):
         plm_model.to("cpu")
         torch.cuda.empty_cache()
-def update_threshold_ui(image_pil, text_prompt, threshold, cached_prob):
     """Real-time update using CPU only (no GPU quota usage)."""
     if image_pil is None or cached_prob is None:
         return None
     rgb_orig = np.array(image_pil.convert("RGB"))
     mask = (cached_prob > threshold).astype(np.uint8) * 255
-    return make_overlay(rgb_orig, mask, key=text_prompt)
 # ----------------- UI Styling & Layout -----------------
@@ -202,6 +209,15 @@ h1 {
     font-size: 1.1em;
     margin-bottom: 20px;
 }
 """
 theme = gr.themes.Soft(
@@ -227,13 +243,25 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConvSeg-Net Demo") as demo:
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image", height=400)
             with gr.Group():
-                text_prompt = gr.Textbox(
-                    label="Conversational Prompt",
-                    placeholder="e.g., Segment the object that is prone to rolling...",
-                    lines=2
-                )
-                gr.Markdown("💡 **Tip:** The model works best when prompts start with **'Segment the...'**")
             with gr.Accordion("⚙️ Advanced Options", open=False):
                 threshold_slider = gr.Slider(
@@ -250,15 +278,16 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConvSeg-Net Demo") as demo:
             out_heatmap = gr.Image(label="Confidence Heatmap", type="pil")
     # --- Examples Section ---
     gr.Markdown("### 📝 Try Examples")
     gr.Examples(
         examples=[
-            ["./examples/elephants.png", "Segment the elephant acting as the vanguard of the herd."],
-            ["./examples/luggage.png", "Segment luggage resting precariously."],
-            ["./examples/veggies.png", "Segment the produce harvested from underground."],
         ],
         inputs=[input_image, text_prompt],
-        # cache_examples=True # Uncomment if you want to pre-compute these on startup
     )
     # --- Event Handling ---

 # ----------------- GPU Inference -----------------
 @spaces.GPU(duration=120)
+def run_prediction(image_pil, user_text, threshold=0.5):
+    if image_pil is None or not user_text:
         return None, None, None
+    # --- Prepend the required prefix ---
+    full_prompt = f"Segment the {user_text.strip()}"
+    logging.info(f"Processing prompt: {full_prompt}")
     ensure_models_loaded()
     sam_model = MODEL_CACHE["sam"]
     plm_model = MODEL_CACHE["plm"]
             with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp:
                 image_pil.save(tmp.name)
                 # Qwen/PLM processes the text prompt here
+                sp, dp = plm_model([full_prompt], image_emb.shape[2], image_emb.shape[3], [tmp.name])
             # SAM2 Mask Decoder
             dec = sam_model.sam_mask_decoder
         heatmap_rgb = cv2.cvtColor(heatmap_cv, cv2.COLOR_BGR2RGB)
         mask = (prob > threshold).astype(np.uint8) * 255
+        # Use full_prompt for key to ensure consistent colors
+        overlay = make_overlay(rgb_orig, mask, key=full_prompt)
         return overlay, Image.fromarray(heatmap_rgb), prob
         plm_model.to("cpu")
         torch.cuda.empty_cache()
+def update_threshold_ui(image_pil, user_text, threshold, cached_prob):
     """Real-time update using CPU only (no GPU quota usage)."""
     if image_pil is None or cached_prob is None:
         return None
     rgb_orig = np.array(image_pil.convert("RGB"))
     mask = (cached_prob > threshold).astype(np.uint8) * 255
+    # Reconstruct full prompt to maintain consistent color hashing
+    full_prompt = f"Segment the {user_text.strip()}" if user_text else "mask"
+    return make_overlay(rgb_orig, mask, key=full_prompt)
 # ----------------- UI Styling & Layout -----------------
     font-size: 1.1em;
     margin-bottom: 20px;
 }
+.prefix-container {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    height: 100%;
+    font-size: 1.1em;
+    font-weight: 600;
+    color: #444;
+}
 """
 theme = gr.themes.Soft(
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image", height=400)
+            # Custom prompt input layout
+            gr.Markdown("**Conversational Prompt**")
             with gr.Group():
+                with gr.Row(equal_height=True):
+                    # Fixed Prefix
+                    gr.HTML(
+                        "<div class='prefix-container'>Segment the</div>",
+                        elem_classes="prefix-box",
+                        min_width=110,
+                        max_width=110
+                    )
+                    # User Input
+                    text_prompt = gr.Textbox(
+                        show_label=False,
+                        container=False,
+                        placeholder="object that is prone to rolling...",
+                        lines=1,
+                        scale=5
+                    )
             with gr.Accordion("⚙️ Advanced Options", open=False):
                 threshold_slider = gr.Slider(
             out_heatmap = gr.Image(label="Confidence Heatmap", type="pil")
     # --- Examples Section ---
+    # Note: removed "Segment the " from examples as it is auto-appended now
     gr.Markdown("### 📝 Try Examples")
     gr.Examples(
         examples=[
+            ["./examples/elephants.png", "elephant acting as the vanguard of the herd."],
+            ["./examples/luggage.png", "luggage resting precariously."],
+            ["./examples/veggies.png", "produce harvested from underground."],
         ],
         inputs=[input_image, text_prompt],
+        # cache_examples=True
     )
     # --- Event Handling ---