Spaces:

nishanth-saka
/

regen

Sleeping

App Files Files Community

UPDATE

by nishanth-saka - opened Nov 4, 2025

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+86

-63

Files changed (1) hide show

app.py +86 -63

app.py CHANGED Viewed

@@ -1,18 +1,26 @@
 import gradio as gr
 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
 from PIL import Image, ImageDraw
-import torch
-import numpy as np
 from sklearn.cluster import KMeans
-from transformers import AutoImageProcessor, AutoModel
-import cv2
 # -----------------------------------------------------
-# 1️⃣  Load SAM + DINOv2
 # -----------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-sam = sam_model_registry["vit_b"](checkpoint=None).to(device)
 mask_generator = SamAutomaticMaskGenerator(sam)
 processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
 dinov2 = AutoModel.from_pretrained("facebook/dinov2-base").to(device)
@@ -20,7 +28,7 @@ dinov2 = AutoModel.from_pretrained("facebook/dinov2-base").to(device)
 # 2️⃣  Utility Functions
 # -----------------------------------------------------
 def get_embeddings(img):
-    """DINOv2 feature embedding for region similarity."""
     inputs = processor(images=img, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = dinov2(**inputs)
@@ -28,12 +36,12 @@ def get_embeddings(img):
     return feat.mean(axis=0)
 def remove_background(image):
-    """Simple background removal using SAM largest mask."""
     masks = mask_generator.generate(image)
     if not masks:
         return image
     main_mask = max(masks, key=lambda x: x['area'])['segmentation']
-    image[~main_mask] = 255  # white background
     return image
 def get_centroid(mask):
@@ -43,68 +51,81 @@ def get_centroid(mask):
     y, x = coords.mean(axis=0)
     return int(x), int(y)
 # -----------------------------------------------------
-# 3️⃣  Segmentation Core
 # -----------------------------------------------------
 def segment_saree(image):
-    image = np.array(image.convert("RGB"))
-    image = remove_background(image)        # background cleanup
-    masks = mask_generator.generate(image)
-    if not masks:
-        return None, None, None, None
-    regions = []
-    for m in masks:
-        area = m['area']
-        mask = m['segmentation']
-        region_img = Image.fromarray(np.uint8(image) * mask[..., None])
-        emb = get_embeddings(region_img)
-        regions.append((mask, emb, area))
-    # Cluster regions (3 = body/border/pallu)
-    feats = np.array([r[1] for r in regions])
-    kmeans = KMeans(n_clusters=3, random_state=42).fit(feats)
-    labels = kmeans.labels_
-    label_names = ["Body", "Border", "Pallu"]
-    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0)]
-    seg_color = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
-    # prepare transparent layers
-    layers = [np.zeros_like(image, dtype=np.uint8) for _ in range(3)]
-    for i, (mask, _, _) in enumerate(regions):
-        seg_color[mask] = colors[labels[i]]
-        layers[labels[i]][mask] = image[mask]
-    # overlay label text + legend
-    seg_img = Image.fromarray(seg_color)
-    draw = ImageDraw.Draw(seg_img)
-    for i, (mask, _, _) in enumerate(regions):
-        x, y = get_centroid(mask)
-        draw.text((x, y), label_names[labels[i]], fill=(255, 255, 255))
-    # create transparent PILs
-    layer_imgs = [Image.fromarray(cv2.cvtColor(l, cv2.COLOR_BGR2RGBA)) for l in layers]
-    for l in layer_imgs:
-        alpha = np.where(np.all(np.array(l)[..., :3] == 0, axis=-1), 0, 255)
-        arr = np.array(l)
-        arr[..., 3] = alpha
-        l.paste(Image.fromarray(arr))
-    return seg_img, layer_imgs[0], layer_imgs[1], layer_imgs[2]
 # -----------------------------------------------------
-# 4️⃣  Gradio Interface
 # -----------------------------------------------------
 description = """
-### 🧶 Saree AI — SAM + DINOv2 Smart Segmentation
-Upload a **flat or draped saree image**.
-The app will:
 - ✂️ Remove background
-- 🎨 Segment into **Body**, **Border**, **Pallu**
-- 🪞 Give you individual transparent PNGs
-Ideal for recoloring, catalog creation, or draping models.
 """
 demo = gr.Interface(
@@ -115,9 +136,11 @@ demo = gr.Interface(
         gr.Image(type="pil", label="Body (Transparent)"),
         gr.Image(type="pil", label="Border (Transparent)"),
         gr.Image(type="pil", label="Pallu (Transparent)"),
     ],
-    title="🧵 Saree AI — Intelligent Segmentation & Layer Extraction",
     description=description,
 )
 if __name__ == "__main__":

 import gradio as gr
 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+from transformers import AutoImageProcessor, AutoModel
+from huggingface_hub import snapshot_download
 from PIL import Image, ImageDraw
+import torch, numpy as np, cv2, zipfile, io, os
 from sklearn.cluster import KMeans
 # -----------------------------------------------------
+# 1️⃣  Model Initialization
 # -----------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Download SAM checkpoint if missing ---
+if not os.path.exists("sam_vit_b_01ec64.pth"):
+    os.system("wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth")
+# --- Load SAM ---
+sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth").to(device)
 mask_generator = SamAutomaticMaskGenerator(sam)
+# --- Preload DINOv2 ---
+snapshot_download("facebook/dinov2-base")
 processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
 dinov2 = AutoModel.from_pretrained("facebook/dinov2-base").to(device)
 # 2️⃣  Utility Functions
 # -----------------------------------------------------
 def get_embeddings(img):
+    """Extract DINOv2 feature embeddings."""
     inputs = processor(images=img, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = dinov2(**inputs)
     return feat.mean(axis=0)
 def remove_background(image):
+    """Use largest SAM mask to isolate saree from background."""
     masks = mask_generator.generate(image)
     if not masks:
         return image
     main_mask = max(masks, key=lambda x: x['area'])['segmentation']
+    image[~main_mask] = 255  # white out background
     return image
 def get_centroid(mask):
     y, x = coords.mean(axis=0)
     return int(x), int(y)
+def make_transparent(img, mask):
+    rgba = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
+    rgba[..., 3] = np.where(mask, 255, 0).astype(np.uint8)
+    return rgba
 # -----------------------------------------------------
+# 3️⃣  Main Segmentation Function
 # -----------------------------------------------------
 def segment_saree(image):
+    try:
+        image = np.array(image.convert("RGB"))
+        image = remove_background(image)
+        masks = mask_generator.generate(image)
+        if not masks:
+            raise ValueError("No masks generated")
+        regions = []
+        for m in masks:
+            mask = m["segmentation"]
+            region_img = Image.fromarray(np.uint8(image) * mask[..., None])
+            emb = get_embeddings(region_img)
+            regions.append((mask, emb))
+        if len(regions) < 3:
+            raise ValueError("Insufficient distinct regions")
+        features = np.array([r[1] for r in regions])
+        kmeans = KMeans(n_clusters=3, random_state=42).fit(features)
+        labels = kmeans.labels_
+        colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0)]
+        names = ["Body", "Border", "Pallu"]
+        seg_out = np.zeros_like(image)
+        layers = [np.zeros_like(image, dtype=np.uint8) for _ in range(3)]
+        for i, (mask, _) in enumerate(regions):
+            seg_out[mask] = colors[labels[i]]
+            layers[labels[i]][mask] = image[mask]
+        seg_img = Image.fromarray(seg_out)
+        draw = ImageDraw.Draw(seg_img)
+        for (mask, _), lbl in zip(regions, labels):
+            x, y = get_centroid(mask)
+            draw.text((x, y), names[lbl], fill=(255, 255, 255))
+        # Transparent layers
+        transparent_imgs = [Image.fromarray(make_transparent(l, l.any(axis=2))) for l in layers]
+        # ZIP all outputs
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zf:
+            for n, t in zip(names, transparent_imgs):
+                bio = io.BytesIO()
+                t.save(bio, format="PNG")
+                zf.writestr(f"{n}.png", bio.getvalue())
+        zip_buffer.seek(0)
+        return seg_img, transparent_imgs[0], transparent_imgs[1], transparent_imgs[2], zip_buffer
+    except Exception as e:
+        print("Error:", e)
+        blank = Image.new("RGB", (512, 512), color=(30, 30, 30))
+        return blank, blank, blank, blank, None
 # -----------------------------------------------------
+# 4️⃣  Gradio UI
 # -----------------------------------------------------
 description = """
+### 🧵 Saree AI — Intelligent Segmentation & Layer Export
+Upload a **flat or draped saree image**, and this tool will:
 - ✂️ Remove background
+- 🧠 Segment into **Body**, **Border**, **Pallu** using SAM + DINOv2
+- 🪞 Provide transparent PNGs
+- 📦 Download all masks as a single ZIP
+Built for saree recoloring, catalog automation, and AI draping pipelines.
 """
 demo = gr.Interface(
         gr.Image(type="pil", label="Body (Transparent)"),
         gr.Image(type="pil", label="Border (Transparent)"),
         gr.Image(type="pil", label="Pallu (Transparent)"),
+        gr.File(label="📦 Download All (ZIP)"),
     ],
+    title="🧶 Saree AI — SAM + DINOv2 Smart Segmentation",
     description=description,
+    allow_flagging="never",
 )
 if __name__ == "__main__":