Spaces:

Inam65
/

Object-Extractor

Sleeping

App Files Files Community

Inam65 commited on Dec 23, 2025

Commit

d90f6b4

verified ·

1 Parent(s): 1c45d68

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -46

app.py CHANGED Viewed

@@ -1,50 +1,66 @@
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 from transformers import SamModel, SamProcessor
-# 1. Load the Model and Processor
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
 processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
 def segment_object(image_data):
-    # image_data is a dictionary from the Gradio ImageEditor
-    if image_data is None or "composite" not in image_data:
         return None
     raw_image = image_data["background"].convert("RGB")
-    # Get the bounding box from the editor's layers
-    # Gradio returns shapes in a list. We look for the rectangle.
     layers = image_data.get("layers", [])
     if not layers:
-        return raw_image # Return original if no box drawn
-    # For simplicity, we take the first box found
-    # In a real app, you'd iterate to find the 'crop' or 'rect' layer
-    # Here we use the composite mask logic for a beginner-friendly approach
-    # Convert image for model
-    inputs = processor(raw_image, return_tensors="pt").to(device)
-    image_embeddings = model.get_image_embeddings(inputs["pixel_values"])
-    # In this simple version, we'll use the 'mask' drawn by the user
-    # to find the object. If you use the 'brush' or 'rect' tool:
-    mask = image_data["layers"][0].split()[-1] # Alpha channel of the drawing layer
-    mask = np.array(mask)
-    # Find the coordinates of the drawn rectangle
-    coords = np.argwhere(mask > 0)
     if coords.size == 0:
-        return raw_image
     y0, x0 = coords.min(axis=0)
     y1, x1 = coords.max(axis=0)
     input_boxes = [[[x0, y0, x1, y1]]]
-    # 2. Predict the mask
     inputs = processor(raw_image, input_boxes=[input_boxes], return_tensors="pt").to(device)
     inputs.pop("pixel_values", None)
     inputs["image_embeddings"] = image_embeddings
@@ -52,42 +68,57 @@ def segment_object(image_data):
     with torch.no_grad():
         outputs = model(**inputs)
-    # 3. Process the results
     masks = processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(),
         inputs.original_sizes.cpu(),
         inputs.reshaped_input_sizes.cpu()
     )
-    # Take the first mask (best guess)
     best_mask = masks[0][0][0].numpy()
-    # 4. Create High-Quality White Background
     raw_np = np.array(raw_image)
-    # Create an image where the background is white [255, 255, 255]
     white_bg = np.ones_like(raw_np) * 255
-    # Place object on white background
-    # We use the mask to choose between original pixels and white pixels
-    final_img = np.where(best_mask[..., None], raw_np, white_bg)
-    return Image.fromarray(final_img.astype('uint8'))
-# 3. Create the Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 🖌️ Object Extractor to White Background")
-    gr.Markdown("1. Upload an image. 2. Use the **Box** or **Brush** tool to highlight the object. 3. Click Submit.")
     with gr.Row():
-        input_img = gr.ImageEditor(
-            label="Input Image",
-            type="pil",
-            layers=True,
-            canvas_size=(512, 512)
-        )
-        output_img = gr.Image(label="Extracted Object", type="pil")
-    submit_btn = gr.Button("Extract Object")
-    submit_btn.click(segment_object, inputs=[input_img], outputs=[output_img])
 demo.launch()

 import gradio as gr
 import numpy as np
 import torch
+import cv2
 from PIL import Image
 from transformers import SamModel, SamProcessor
+# 1. Load the Model and Processor (using the base model for speed)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
 processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+def refine_mask(mask):
+    """
+    Cleans up the mask by keeping only the largest connected object
+    and smoothing the edges.
+    """
+    # Convert boolean mask to 8-bit image (0 and 255)
+    mask_8bit = (mask.astype(np.uint8)) * 255
+    # Find all connected 'blobs'
+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask_8bit, connectivity=8)
+    if num_labels > 1:
+        # We ignore index 0 (the background) and find the largest area among the rest
+        largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
+        refined_mask = (labels == largest_label).astype(np.uint8)
+    else:
+        refined_mask = mask_8bit / 255
+    # Smooth the edges slightly using a Gaussian Blur
+    refined_mask = cv2.GaussianBlur(refined_mask.astype(float), (3, 3), 0)
+    return refined_mask > 0.5
 def segment_object(image_data):
+    if image_data is None or "background" not in image_data:
         return None
+    # Load the background image
     raw_image = image_data["background"].convert("RGB")
+    # Extract the user's drawing from the layers
+    # We look at the alpha channel of the first layer to see where the user drew
     layers = image_data.get("layers", [])
     if not layers:
+        return raw_image
+    # Get coordinates from the drawing layer
+    mask_layer = np.array(layers[0].split()[-1]) # Alpha channel
+    coords = np.argwhere(mask_layer > 0)
     if coords.size == 0:
+        return raw_image # Return original if no selection made
+    # Define the bounding box [x0, y0, x1, y1]
     y0, x0 = coords.min(axis=0)
     y1, x1 = coords.max(axis=0)
     input_boxes = [[[x0, y0, x1, y1]]]
+    # --- AI PREDICTION ---
+    inputs = processor(raw_image, return_tensors="pt").to(device)
+    image_embeddings = model.get_image_embeddings(inputs["pixel_values"])
     inputs = processor(raw_image, input_boxes=[input_boxes], return_tensors="pt").to(device)
     inputs.pop("pixel_values", None)
     inputs["image_embeddings"] = image_embeddings
     with torch.no_grad():
         outputs = model(**inputs)
+    # Convert output to a binary mask
     masks = processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(),
         inputs.original_sizes.cpu(),
         inputs.reshaped_input_sizes.cpu()
     )
     best_mask = masks[0][0][0].numpy()
+    # --- REFINEMENT STEP ---
+    # This removes the "spots" you saw in your previous result
+    final_mask = refine_mask(best_mask)
+    # --- CREATE FINAL IMAGE ---
     raw_np = np.array(raw_image)
+    # Create a pure white background
     white_bg = np.ones_like(raw_np) * 255
+    # Blend: If mask is 1, take original pixel. If 0, take white pixel.
+    output_np = np.where(final_mask[..., None], raw_np, white_bg)
+    return Image.fromarray(output_np.astype('uint8'))
+# 3. Build the Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🛠️ High-Quality Object Extractor")
+    gr.Markdown("Upload an image and **draw a tight rectangle** around the object you want to keep.")
     with gr.Row():
+        with gr.Column():
+            # The ImageEditor allows users to draw rectangles
+            img_input = gr.ImageEditor(
+                label="Input Image (Draw a Box)",
+                type="pil",
+                layers=True,
+                sources=["upload", "clipboard"],
+                canvas_size=(712, 712)
+            )
+            submit_btn = gr.Button("Extract & Clean Mask", variant="primary")
+        with gr.Column():
+            img_output = gr.Image(label="Result (White Background)", type="pil")
+    submit_btn.click(
+        fn=segment_object,
+        inputs=[img_input],
+        outputs=[img_output]
+    )
+    gr.Markdown("---")
+    gr.Markdown("### 💡 Tips for better results:")
+    gr.Markdown("- Draw your rectangle as **close to the object edges** as possible.")
+    gr.Markdown("- If there are still spots, try using the **brush tool** instead of the rectangle to 'paint' exactly what you want.")
 demo.launch()