Spaces:

nishanth-saka
/

depth-pattern

Sleeping

App Files Files Community

nishanth-saka commited on Aug 10, 2025

Commit

2ac33ac

verified ·

1 Parent(s): 42acb28

Feathering Overlays

Browse files

Files changed (1) hide show

app.py +116 -44

app.py CHANGED Viewed

@@ -53,10 +53,15 @@ def depth_to_normal(depth):
 # CORE PROCESSING FUNCTION
 # ===============================
 def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     img_pil = base_image.convert("RGB")
     img_np = np.array(img_pil)
-    # Prepare tensor
     img_resized = img_pil.resize((384, 384))
     img_tensor = torch.from_numpy(np.array(img_resized)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     mean = torch.as_tensor([0.5, 0.5, 0.5], device=img_tensor.device).view(1, 3, 1, 1)
@@ -67,84 +72,151 @@ def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     model = SimpleDPT(backbone_name='vit_base_patch16_384').to(device)
     model.eval()
-    # Depth inference
     with torch.no_grad():
         target_size = img_pil.size[::-1]
         depth_map = model(img_tensor.to(device), target_size=target_size)
         depth_map = depth_map.squeeze().cpu().numpy()
-    # Normalize depth
-    depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    # Normal map
     normal_map = depth_to_normal(depth_vis)
-    # Shading map (CLAHE)
     img_lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
     l_channel, _, _ = cv2.split(img_lab)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     l_clahe = clahe.apply(l_channel)
     shading_map = l_clahe / 255.0
-    # Tile pattern
-    pattern_np = np.array(pattern_image.convert("RGB"))
     target_h, target_w = img_np.shape[:2]
-    pattern_h, pattern_w = pattern_np.shape[:2]
-    pattern_tiled = np.zeros((target_h, target_w, 3), dtype=np.uint8)
-    for y in range(0, target_h, pattern_h):
-        for x in range(0, target_w, pattern_w):
-            end_y = min(y + pattern_h, target_h)
-            end_x = min(x + pattern_w, target_w)
-            pattern_tiled[y:end_y, x:end_x] = pattern_np[0:(end_y - y), 0:(end_x - x)]
-    # Blend pattern
     normal_map_loaded = normal_map.astype(np.float32)
-    shading_map_loaded = np.stack([shading_map] * 3, axis=-1)
-    alpha = 0.7
-    blended_shading = alpha * shading_map_loaded + (1 - alpha)
-    pattern_folded = pattern_tiled.astype(np.float32) / 255.0 * blended_shading
     normal_boost = 0.5 + 0.5 * normal_map_loaded[..., 2:3]
     pattern_folded *= normal_boost
-    pattern_folded = np.clip(pattern_folded, 0, 1)
-    # ==========================================================
-    # Background removal with post-processing (no duplicate blur)
-    # ==========================================================
     buf = BytesIO()
     base_image.save(buf, format="PNG")
     base_bytes = buf.getvalue()
-    # Get RGBA from bgrem
     result_no_bg = bgrem_remove(base_bytes)
     mask_img = Image.open(BytesIO(result_no_bg)).convert("RGBA")
-    # Extract alpha and clean edges
     mask_alpha = np.array(mask_img)[:, :, 3].astype(np.float32) / 255.0
-    # 1. Slightly stronger shrink (balanced)
     k = 5
-    kernel = np.ones((k, k), np.uint8)   # slightly larger kernel
-    mask_binary = (mask_alpha > k/100).astype(np.uint8) * 255  # slightly stricter threshold
-    mask_eroded = cv2.erode(mask_binary, kernel, iterations=3)  # balanced erosion
-    # 2. Feather edges (blur)
     mask_blurred = cv2.GaussianBlur(mask_eroded, (15, 15), sigmaX=3, sigmaY=3)
-    # 3. Normalize
-    mask_blurred = mask_blurred.astype(np.float32) / 255.0
-    # Final RGBA
-    mask_stack = np.stack([mask_blurred] * 3, axis=-1)
-    pattern_final = pattern_folded * mask_stack
-    pattern_rgb = (pattern_final * 255).astype(np.uint8)
-    alpha_channel = (mask_blurred * 255).astype(np.uint8)
-    pattern_rgba = np.dstack((pattern_rgb, alpha_channel))
     return Image.fromarray(pattern_rgba, mode="RGBA")
 # ===============================
 # WRAPPER: ACCEPT BYTES OR BASE64
 # ===============================

 # CORE PROCESSING FUNCTION
 # ===============================
 def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
+    # ===============================
+    # 0) Prep: base to RGB/np
+    # ===============================
     img_pil = base_image.convert("RGB")
     img_np = np.array(img_pil)
+    # ===============================
+    # 1) Depth inference (kept as-is)
+    # ===============================
     img_resized = img_pil.resize((384, 384))
     img_tensor = torch.from_numpy(np.array(img_resized)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     mean = torch.as_tensor([0.5, 0.5, 0.5], device=img_tensor.device).view(1, 3, 1, 1)
     model = SimpleDPT(backbone_name='vit_base_patch16_384').to(device)
     model.eval()
     with torch.no_grad():
         target_size = img_pil.size[::-1]
         depth_map = model(img_tensor.to(device), target_size=target_size)
         depth_map = depth_map.squeeze().cpu().numpy()
+    # Normalize depth and build normal map
+    depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
     normal_map = depth_to_normal(depth_vis)
+    # ===============================
+    # 2) Shading map (CLAHE)
+    # ===============================
     img_lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
     l_channel, _, _ = cv2.split(img_lab)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     l_clahe = clahe.apply(l_channel)
     shading_map = l_clahe / 255.0
+    shading_map_loaded = np.stack([shading_map] * 3, axis=-1)  # (H,W,3)
+    # ===============================
+    # 3) OVERLAY alpha feather (NEW)
+    # ===============================
+    # pattern_np = np.array(pattern_image.convert("RGB"))  # <-- ORIGINAL (kills alpha)  [COMMENTED]
+    pattern_rgba_full = np.array(pattern_image.convert("RGBA"))  # keep alpha
+    alpha = pattern_rgba_full[:, :, 3].astype(np.float32) / 255.0
+    # feather alpha a little to soften edges
+    alpha_feathered = cv2.GaussianBlur(alpha, (5, 5), sigmaX=2, sigmaY=2)
+    alpha_feathered = np.clip(alpha_feathered, 0.0, 1.0)
+    # premultiply RGB by feathered alpha
+    rgb = pattern_rgba_full[:, :, :3].astype(np.float32) / 255.0
+    rgb_pm = rgb * alpha_feathered[..., None]  # premultiplied RGB in [0,1]
+    # Optional: crop to non-transparent bbox to avoid tiling empty margins
+    alpha_thresh = 0.01
+    ys, xs = np.where(alpha_feathered > alpha_thresh)
+    if ys.size > 0:
+        y0, y1 = ys.min(), ys.max() + 1
+        x0, x1 = xs.min(), xs.max() + 1
+        rgb_pm = rgb_pm[y0:y1, x0:x1, :]
+        alpha_crop = alpha_feathered[y0:y1, x0:x1]
+    else:
+        alpha_crop = alpha_feathered  # degenerate case
+    ph, pw = alpha_crop.shape[:2]
+    # ===============================
+    # 4) Alpha-aware tiling (NEW)
+    # ===============================
     target_h, target_w = img_np.shape[:2]
+    # --- ORIGINAL hard RGB tiling (caused seams) [COMMENTED] ---
+    # pattern_h, pattern_w = pattern_np.shape[:2]
+    # pattern_tiled = np.zeros((target_h, target_w, 3), dtype=np.uint8)
+    # for y in range(0, target_h, pattern_h):
+    #     for x in range(0, target_w, pattern_w):
+    #         end_y = min(y + pattern_h, target_h)
+    #         end_x = min(x + pattern_w, target_w)
+    #         pattern_tiled[y:end_y, x:end_x] = pattern_np[0:(end_y - y), 0:(end_x - x)]
+    # NEW: premultiplied "over" compositing per tile
+    canvas_rgb_pm = np.zeros((target_h, target_w, 3), dtype=np.float32)
+    canvas_a = np.zeros((target_h, target_w, 1), dtype=np.float32)
+    tile_rgb_pm_src = rgb_pm.astype(np.float32)         # (ph,pw,3), premultiplied
+    tile_a_src = alpha_crop.astype(np.float32)[..., None]  # (ph,pw,1)
+    for y in range(0, target_h, ph):
+        for x in range(0, target_w, pw):
+            end_y = min(y + ph, target_h)
+            end_x = min(x + pw, target_w)
+            h = end_y - y
+            w = end_x - x
+            src_rgb_pm = tile_rgb_pm_src[:h, :w, :]
+            src_a = tile_a_src[:h, :w, :]
+            dst_rgb_pm = canvas_rgb_pm[y:end_y, x:end_x, :]
+            dst_a = canvas_a[y:end_y, x:end_x, :]
+            out_rgb_pm = src_rgb_pm + dst_rgb_pm * (1.0 - src_a)
+            out_a = src_a + dst_a * (1.0 - src_a)
+            canvas_rgb_pm[y:end_y, x:end_x, :] = out_rgb_pm
+            canvas_a[y:end_y, x:end_x, :] = out_a
+    # Un-premultiply to get display RGB; keep tiled overlay alpha
+    canvas_a_safe = np.clip(canvas_a, 1e-6, 1.0)
+    pattern_rgb_tiled = np.clip(canvas_rgb_pm / canvas_a_safe, 0.0, 1.0)  # (H,W,3)
+    pattern_alpha_tiled = np.clip(canvas_a[..., 0], 0.0, 1.0)             # (H,W)
+    # ===============================
+    # 5) Apply shading + normal boost (kept as-is)
+    # ===============================
     normal_map_loaded = normal_map.astype(np.float32)
+    alpha_shading = 0.7
+    blended_shading = alpha_shading * shading_map_loaded + (1 - alpha_shading)
+    # --- ORIGINAL (kept) ---
+    # pattern_folded = pattern_tiled.astype(np.float32) / 255.0 * blended_shading
+    # normal_boost = 0.5 + 0.5 * normal_map_loaded[..., 2:3]
+    # pattern_folded *= normal_boost
+    # pattern_folded = np.clip(pattern_folded, 0, 1)
+    pattern_folded = pattern_rgb_tiled * blended_shading
     normal_boost = 0.5 + 0.5 * normal_map_loaded[..., 2:3]
     pattern_folded *= normal_boost
+    pattern_folded = np.clip(pattern_folded, 0.0, 1.0)
+    # ===============================
+    # 6) Background removal for the BASE (kept, with your tuning)
+    # ===============================
     buf = BytesIO()
     base_image.save(buf, format="PNG")
     base_bytes = buf.getvalue()
     result_no_bg = bgrem_remove(base_bytes)
     mask_img = Image.open(BytesIO(result_no_bg)).convert("RGBA")
     mask_alpha = np.array(mask_img)[:, :, 3].astype(np.float32) / 255.0
+    # Slightly stronger shrink + feather (your settings)
     k = 5
+    kernel = np.ones((k, k), np.uint8)
+    mask_binary = (mask_alpha > k / 100.0).astype(np.uint8) * 255
+    mask_eroded = cv2.erode(mask_binary, kernel, iterations=3)
     mask_blurred = cv2.GaussianBlur(mask_eroded, (15, 15), sigmaX=3, sigmaY=3)
+    mask_blurred = mask_blurred.astype(np.float32) / 255.0  # [0,1]
+    # ===============================
+    # 7) Combine BASE mask with OVERLAY tiled alpha (NEW)
+    # ===============================
+    overlay_alpha_stack = pattern_alpha_tiled  # (H,W) in [0,1]
+    alpha_combined = np.clip(mask_blurred * overlay_alpha_stack, 0.0, 1.0)
+    # Apply combined alpha to folded pattern
+    pattern_final_rgb = pattern_folded * alpha_combined[..., None]
+    pattern_rgb_u8 = (np.clip(pattern_final_rgb, 0.0, 1.0) * 255).astype(np.uint8)
+    alpha_u8 = (alpha_combined * 255).astype(np.uint8)
+    pattern_rgba = np.dstack((pattern_rgb_u8, alpha_u8))
     return Image.fromarray(pattern_rgba, mode="RGBA")
 # ===============================
 # WRAPPER: ACCEPT BYTES OR BASE64
 # ===============================