Spaces:

nishanth-saka
/

regen

Sleeping

App Files Files Community

TORCH

by nishanth-saka - opened Nov 4, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+115

-108

Files changed (1) hide show

app.py +115 -108

app.py CHANGED Viewed

@@ -1,117 +1,124 @@
 import gradio as gr
-import cv2
-import numpy as np
-from PIL import Image
 import torch
-# Optional: try to load a Hugging Face dewarping model if installed
-try:
-    from transformers import AutoModel, AutoImageProcessor
-    MODEL_REPO = "richard1231/Document_dewarping_platform"
-    processor = AutoImageProcessor.from_pretrained(MODEL_REPO)
-    model = AutoModel.from_pretrained(MODEL_REPO)
-    model.eval()
-    USE_HF_MODEL = True
-except Exception as e:
-    print("⚠️ Hugging Face model not found, using OpenCV-only version.")
-    USE_HF_MODEL = False
-# -------------------------------------------------------------
-# 🔹 Perspective correction (OpenCV fallback)
-# -------------------------------------------------------------
-def flatten_perspective(input_image: Image.Image) -> Image.Image:
-    img = np.array(input_image.convert("RGB"))
-    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    blur = cv2.GaussianBlur(gray, (5, 5), 0)
-    edges = cv2.Canny(blur, 50, 150)
-    contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        return input_image
-    contour = max(contours, key=cv2.contourArea)
-    peri = cv2.arcLength(contour, True)
-    approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
-    if len(approx) != 4:
-        return input_image
-    pts = np.float32(approx.reshape(4, 2))
-    s = pts.sum(axis=1)
-    rect = np.zeros((4, 2), dtype="float32")
-    rect[0] = pts[np.argmin(s)]
-    rect[2] = pts[np.argmax(s)]
-    diff = np.diff(pts, axis=1)
-    rect[1] = pts[np.argmin(diff)]
-    rect[3] = pts[np.argmax(diff)]
-    (tl, tr, br, bl) = rect
-    widthA = np.linalg.norm(br - bl)
-    widthB = np.linalg.norm(tr - tl)
-    heightA = np.linalg.norm(tr - br)
-    heightB = np.linalg.norm(tl - bl)
-    maxWidth, maxHeight = int(max(widthA, widthB)), int(max(heightA, heightB))
-    dst = np.array([[0, 0],
-                    [maxWidth - 1, 0],
-                    [maxWidth - 1, maxHeight - 1],
-                    [0, maxHeight - 1]], dtype="float32")
-    M = cv2.getPerspectiveTransform(rect, dst)
-    warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))
-    return Image.fromarray(warped)
-# -------------------------------------------------------------
-# 🔹 Learned de-warping (Hugging Face model)
-# -------------------------------------------------------------
-@torch.no_grad()
-def flatten_learned(input_image: Image.Image) -> Image.Image:
-    if not USE_HF_MODEL:
-        return flatten_perspective(input_image)
-    inputs = processor(images=input_image, return_tensors="pt")
-    outputs = model(**inputs)
-    # Post-process — many HF models return tensors in 0-1 range
-    out_img = outputs.last_hidden_state[0]
-    out_img = (out_img - out_img.min()) / (out_img.max() - out_img.min())
-    out_img = (out_img * 255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
-    return Image.fromarray(out_img)
-# -------------------------------------------------------------
-# 🔹 Gradio UI
-# -------------------------------------------------------------
 description = """
-## 🧾 Auto Image Flattening (Perspective + Learned Dewarping)
-Upload a **tilted or curved document/fabric photo**.
-- Default: OpenCV 4-point perspective flattening
-- Optional: if the **Hugging Face DewarpNet/DocRes model** is available, uses that instead
 """
-with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Auto Image Flattening (OpenCV / Hugging Face)")
-    gr.Markdown(description)
-    with gr.Row():
-        inp = gr.Image(type="pil", label="Upload Image")
-        out = gr.Image(type="pil", label="Flattened Output")
-    mode = gr.Radio(["Auto (Use HF if available)", "OpenCV Only"], value="Auto (Use HF if available)", label="Mode")
-    def process(img, mode):
-        if mode == "OpenCV Only" or not USE_HF_MODEL:
-            return flatten_perspective(img)
-        return flatten_learned(img)
-    btn = gr.Button("Flatten Image")
-    btn.click(process, inputs=[inp, mode], outputs=out)
-    gr.Examples(
-        examples=["example1.jpg", "example2.jpg"],
-        inputs=inp,
-        examples_per_page=2,
-    )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+from PIL import Image, ImageDraw
 import torch
+import numpy as np
+from sklearn.cluster import KMeans
+from transformers import AutoImageProcessor, AutoModel
+import cv2
+# -----------------------------------------------------
+# 1️⃣  Load SAM + DINOv2
+# -----------------------------------------------------
+device = "cuda" if torch.cuda.is_available() else "cpu"
+sam = sam_model_registry["vit_b"](checkpoint=None).to(device)
+mask_generator = SamAutomaticMaskGenerator(sam)
+processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
+dinov2 = AutoModel.from_pretrained("facebook/dinov2-base").to(device)
+# -----------------------------------------------------
+# 2️⃣  Utility Functions
+# -----------------------------------------------------
+def get_embeddings(img):
+    """DINOv2 feature embedding for region similarity."""
+    inputs = processor(images=img, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = dinov2(**inputs)
+    feat = outputs.last_hidden_state[0].cpu().numpy()
+    return feat.mean(axis=0)
+def remove_background(image):
+    """Simple background removal using SAM largest mask."""
+    masks = mask_generator.generate(image)
+    if not masks:
+        return image
+    main_mask = max(masks, key=lambda x: x['area'])['segmentation']
+    image[~main_mask] = 255  # white background
+    return image
+def get_centroid(mask):
+    coords = np.column_stack(np.where(mask))
+    if len(coords) == 0:
+        return (0, 0)
+    y, x = coords.mean(axis=0)
+    return int(x), int(y)
+# -----------------------------------------------------
+# 3️⃣  Segmentation Core
+# -----------------------------------------------------
+def segment_saree(image):
+    image = np.array(image.convert("RGB"))
+    image = remove_background(image)        # background cleanup
+    masks = mask_generator.generate(image)
+    if not masks:
+        return None, None, None, None
+    regions = []
+    for m in masks:
+        area = m['area']
+        mask = m['segmentation']
+        region_img = Image.fromarray(np.uint8(image) * mask[..., None])
+        emb = get_embeddings(region_img)
+        regions.append((mask, emb, area))
+    # Cluster regions (3 = body/border/pallu)
+    feats = np.array([r[1] for r in regions])
+    kmeans = KMeans(n_clusters=3, random_state=42).fit(feats)
+    labels = kmeans.labels_
+    label_names = ["Body", "Border", "Pallu"]
+    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0)]
+    seg_color = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
+    # prepare transparent layers
+    layers = [np.zeros_like(image, dtype=np.uint8) for _ in range(3)]
+    for i, (mask, _, _) in enumerate(regions):
+        seg_color[mask] = colors[labels[i]]
+        layers[labels[i]][mask] = image[mask]
+    # overlay label text + legend
+    seg_img = Image.fromarray(seg_color)
+    draw = ImageDraw.Draw(seg_img)
+    for i, (mask, _, _) in enumerate(regions):
+        x, y = get_centroid(mask)
+        draw.text((x, y), label_names[labels[i]], fill=(255, 255, 255))
+    # create transparent PILs
+    layer_imgs = [Image.fromarray(cv2.cvtColor(l, cv2.COLOR_BGR2RGBA)) for l in layers]
+    for l in layer_imgs:
+        alpha = np.where(np.all(np.array(l)[..., :3] == 0, axis=-1), 0, 255)
+        arr = np.array(l)
+        arr[..., 3] = alpha
+        l.paste(Image.fromarray(arr))
+    return seg_img, layer_imgs[0], layer_imgs[1], layer_imgs[2]
+# -----------------------------------------------------
+# 4️⃣  Gradio Interface
+# -----------------------------------------------------
 description = """
+### 🧶 Saree AI — SAM + DINOv2 Smart Segmentation
+Upload a **flat or draped saree image**.
+The app will:
+- ✂️ Remove background
+- 🎨 Segment into **Body**, **Border**, **Pallu**
+- 🪞 Give you individual transparent PNGs
+Ideal for recoloring, catalog creation, or draping models.
 """
+demo = gr.Interface(
+    fn=segment_saree,
+    inputs=gr.Image(type="pil", label="Upload Saree Image"),
+    outputs=[
+        gr.Image(type="pil", label="Overlay Mask with Labels"),
+        gr.Image(type="pil", label="Body (Transparent)"),
+        gr.Image(type="pil", label="Border (Transparent)"),
+        gr.Image(type="pil", label="Pallu (Transparent)"),
+    ],
+    title="🧵 Saree AI — Intelligent Segmentation & Layer Extraction",
+    description=description,
+)
 if __name__ == "__main__":
     demo.launch()