Spaces:

nishanth-saka
/

depth-pattern

Sleeping

App Files Files Community

nishanth-saka commited on Aug 9, 2025

Commit

1214ae6

verified ·

1 Parent(s): 33d3bc6

REVERT

Browse files

Files changed (1) hide show

app.py +107 -101

app.py CHANGED Viewed

@@ -50,10 +50,10 @@ def depth_to_normal(depth):
     return normal
 # ===============================
-# CORE PROCESSING FUNCTION (Unified-mask + overlap blend)
 # ===============================
 def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
-    # img_pil = base_image.convert("RGB")  # <-- COMMENTED: can inject black behind transparency
     # img_np = np.array(img_pil)
     # --- ORIGINAL (white matte) kept for reference ---
@@ -67,28 +67,37 @@ def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     # img_np = _rgb_over_white
     # --- end ORIGINAL ---
-    # --- Alpha-aware RGB using median interior-boundary matte (prevents black halo) ---
     base_rgba = base_image.convert("RGBA")
-    _arr = np.array(base_rgba).astype(np.float32)               # (H,W,4) 0..255
-    _rgb = _arr[..., :3]
-    _alpha8 = _arr[..., 3].astype(np.uint8)
-    _a = (_alpha8.astype(np.float32) / 255.0)[..., None]        # (H,W,1)
-    _fg_mask = (_alpha8 > 128).astype(np.uint8) * 255
-    _k3 = np.ones((3, 3), np.uint8)
-    _er1 = cv2.erode(_fg_mask, _k3, iterations=1)
-    _boundary = cv2.bitwise_and(_fg_mask, cv2.bitwise_not(_er1))
     if int((_boundary > 0).sum()) < 100:
-        _d2 = cv2.dilate(_fg_mask, _k3, iterations=2)
-        _e2 = cv2.erode(_fg_mask, _k3, iterations=2)
-        _boundary = cv2.subtract(_d2, _e2)
     _idx = (_boundary > 0)
     if not np.any(_idx):
         _idx = (_fg_mask > 0)
     _median_color = np.median(_rgb[_idx], axis=0) if np.any(_idx) else np.array([255.0, 255.0, 255.0], dtype=np.float32)
-    _median_color = _median_color.reshape(1, 1, 3)
     _rgb_over_matte = _rgb * _a + (1.0 - _a) * _median_color
     _rgb_over_matte = np.clip(_rgb_over_matte, 0.0, 255.0).astype(np.uint8)
@@ -96,7 +105,7 @@ def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     img_np = _rgb_over_matte
     # --- end NEW ---
-    # Prepare tensor (global, once)
     img_resized = img_pil.resize((384, 384))
     img_tensor = torch.from_numpy(np.array(img_resized)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     mean = torch.as_tensor([0.5, 0.5, 0.5], device=img_tensor.device).view(1, 3, 1, 1)
@@ -107,7 +116,7 @@ def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     model = SimpleDPT(backbone_name='vit_base_patch16_384').to(device)
     model.eval()
-    # Depth inference (global)
     with torch.no_grad():
         target_size = img_pil.size[::-1]
         depth_map = model(img_tensor.to(device), target_size=target_size)
@@ -116,124 +125,121 @@ def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
     # Normalize depth
     depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    # Normal & shading maps (global)
     normal_map = depth_to_normal(depth_vis)
     img_lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
     l_channel, _, _ = cv2.split(img_lab)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     l_clahe = clahe.apply(l_channel)
     shading_map = l_clahe / 255.0
-    # Pattern tiling with global origin (no per-region reset)
     pattern_np = np.array(pattern_image.convert("RGB"))
     target_h, target_w = img_np.shape[:2]
-    ph, pw = pattern_np.shape[:2]
     pattern_tiled = np.zeros((target_h, target_w, 3), dtype=np.uint8)
-    for y in range(0, target_h, ph):
-        for x in range(0, target_w, pw):
-            ey = min(y + ph, target_h)
-            ex = min(x + pw, target_w)
-            pattern_tiled[y:ey, x:ex] = pattern_np[0:(ey - y), 0:(ex - x)]
-    # Global fold & light
-    normal_map_f = normal_map.astype(np.float32)
-    shading_map_f = np.stack([shading_map] * 3, axis=-1)
-    alpha_lit = 0.7
-    blended_shading = alpha_lit * shading_map_f + (1 - alpha_lit)
     pattern_folded = pattern_tiled.astype(np.float32) / 255.0 * blended_shading
-    normal_boost = 0.5 + 0.5 * normal_map_f[..., 2:3]
     pattern_folded *= normal_boost
     pattern_folded = np.clip(pattern_folded, 0, 1)
     # ==========================================================
-    # Unified mask from rembg, then overlap cross-fade band
     # ==========================================================
     buf = BytesIO()
     base_image.save(buf, format="PNG")
     base_bytes = buf.getvalue()
-    # Get RGBA from bgrem (unified alpha)
     result_no_bg = bgrem_remove(base_bytes)
     mask_img = Image.open(BytesIO(result_no_bg)).convert("RGBA")
-    mask_alpha = np.array(mask_img)[:, :, 3].astype(np.float32) / 255.0  # [0..1]
-    # --- ORIGINAL mask steps (commented for reference) ---
-    # k = 5
-    # kernel = np.ones((k, k), np.uint8)
-    # mask_binary = (mask_alpha > k/100).astype(np.uint8) * 255
-    # mask_eroded = cv2.erode(mask_binary, kernel, iterations=3)
-    # mask_blurred = cv2.GaussianBlur(mask_eroded, (15, 15), sigmaX=3, sigmaY=3)
-    # mask_blurred = mask_blurred.astype(np.float32) / 255.0
     # mask_stack = np.stack([mask_blurred] * 3, axis=-1)
     # pattern_final = pattern_folded * mask_stack
-    # --- end ORIGINAL ---
-    # --- NEW: overlap band via inner/outer shells (distance-field cross-fade) ---
-    # Tunables:
-    overlap_px    = 10   # width of cross-fade band around edges (px)
-    feather_sigma = 1.5  # light Gaussian to keep transitions smooth
-    bleed_iters   = 2    # color bleed strength along edge
-    alpha_floor   = 0.02 # minimum alpha to hide hairlines
-    # Build binary base (strict) for morphology
-    bin_strict = (mask_alpha > 0.5).astype(np.uint8) * 255
-    k5 = np.ones((5, 5), np.uint8)
-    # Inner and outer shells
-    inner = cv2.erode(bin_strict, k5, iterations=max(1, overlap_px // 5))      # shrink inside
-    outer = cv2.dilate(bin_strict, k5, iterations=max(1, overlap_px // 5))     # grow outside
-    # Distance fields (inside to inner edge; outside to outer edge)
-    d_in  = cv2.distanceTransform(inner, cv2.DIST_L2, 3)        # distance to 0 within inner mask
-    d_out = cv2.distanceTransform(255 - outer, cv2.DIST_L2, 3)  # distance to 0 outside outer mask
-    # Compose smooth alpha:
-    #   1 inside inner  → fully opaque
-    #   0 outside outer → fully transparent
-    #   linear ramp in the overlap band
-    alpha_inside = (inner > 0).astype(np.float32)
-    alpha_outside = (outer == 0).astype(np.float32)
-    # Normalize distances into 0..1 ramps
-    ramp_in  = np.clip(d_in  / max(1.0, overlap_px), 0.0, 1.0)
-    ramp_out = np.clip(d_out / max(1.0, overlap_px), 0.0, 1.0)
-    # Where neither fully inside nor fully outside, use a symmetric blend
-    alpha_band = np.clip(0.5 * (ramp_in + (1.0 - ramp_out)), 0.0, 1.0)
-    alpha_unified = np.where(alpha_inside > 0, 1.0,
-                      np.where(alpha_outside > 0, 0.0, alpha_band))
-    # Feather lightly
-    alpha_unified = cv2.GaussianBlur((alpha_unified * 255).astype(np.uint8),
-                                     (7, 7), sigmaX=feather_sigma, sigmaY=feather_sigma)
-    alpha_unified = alpha_unified.astype(np.float32) / 255.0
-    # Premultiplied blend with unified alpha
-    mask_stack = np.stack([alpha_unified] * 3, axis=-1)
-    pattern_final = pattern_folded * mask_stack  # premultiplied RGB
-    # --- Edge color bleed in premultiplied space (thin band only) ---
-    edge_band = (alpha_unified > 0.0) & (alpha_unified <= min(0.12, overlap_px / max(10.0, overlap_px)))  # ~8–12%
     if np.any(edge_band):
-        k3 = np.ones((3, 3), np.uint8)
-        premul_u8 = (pattern_final * 255).astype(np.uint8)
-        premul_bleed = cv2.dilate(premul_u8, k3, iterations=int(bleed_iters)).astype(np.float32) / 255.0
-        pattern_final[edge_band] = premul_bleed[edge_band]
-    # Premultiplied → straight alpha for PNG export
     eps = 1e-6
-    A = np.clip(alpha_unified, max(alpha_floor, eps), 1.0)
-    A3 = A[..., None]
-    rgb_straight = np.clip(pattern_final / A3, 0.0, 1.0)
     pattern_rgb = (rgb_straight * 255).astype(np.uint8)
-    alpha_channel = (A * 255).astype(np.uint8)
     pattern_rgba = np.dstack((pattern_rgb, alpha_channel))
     return Image.fromarray(pattern_rgba, mode="RGBA")
 # ===============================
 # WRAPPER: ACCEPT BYTES OR BASE64
 # ===============================

     return normal
 # ===============================
+# CORE PROCESSING FUNCTION
 # ===============================
 def _process_saree_core(base_image: Image.Image, pattern_image: Image.Image):
+    # img_pil = base_image.convert("RGB")  # <-- COMMENTED: this injects black behind transparency
     # img_np = np.array(img_pil)
     # --- ORIGINAL (white matte) kept for reference ---
     # img_np = _rgb_over_white
     # --- end ORIGINAL ---
+    # --- NEW: alpha-aware RGB using median color along interior boundary as matte ---
     base_rgba = base_image.convert("RGBA")
+    _arr = np.array(base_rgba).astype(np.float32)              # (H,W,4), RGB in 0..255, A in 0..255
+    _rgb = _arr[..., :3]                                       # (H,W,3)
+    _alpha8 = _arr[..., 3].astype(np.uint8)                    # (H,W) uint8 alpha
+    _a = (_alpha8.astype(np.float32) / 255.0)[..., None]       # (H,W,1) float alpha
+    # Build a foreground mask from alpha (slightly strict to avoid wispy edges)
+    _fg_mask = (_alpha8 > 128).astype(np.uint8) * 255          # (H,W) 0/255
+    # Morphological interior boundary: foreground minus a 1-iteration erosion
+    _k = np.ones((3, 3), np.uint8)
+    _eroded = cv2.erode(_fg_mask, _k, iterations=1)
+    _boundary = cv2.bitwise_and(_fg_mask, cv2.bitwise_not(_eroded))  # thin interior ring
+    # If boundary is too thin/few pixels, widen the ring via morphological gradient
     if int((_boundary > 0).sum()) < 100:
+        _dil = cv2.dilate(_fg_mask, _k, iterations=2)
+        _ero = cv2.erode(_fg_mask, _k, iterations=2)
+        _boundary = cv2.subtract(_dil, _ero)
     _idx = (_boundary > 0)
     if not np.any(_idx):
+        # Fallback: use entire foreground if boundary not found
         _idx = (_fg_mask > 0)
+    # Compute median color over the selected boundary pixels (in 0..255 space)
     _median_color = np.median(_rgb[_idx], axis=0) if np.any(_idx) else np.array([255.0, 255.0, 255.0], dtype=np.float32)
+    _median_color = _median_color.reshape(1, 1, 3)             # (1,1,3)
+    # Composite RGB over median matte (avoid introducing black/white bias)
     _rgb_over_matte = _rgb * _a + (1.0 - _a) * _median_color
     _rgb_over_matte = np.clip(_rgb_over_matte, 0.0, 255.0).astype(np.uint8)
     img_np = _rgb_over_matte
     # --- end NEW ---
+    # Prepare tensor
     img_resized = img_pil.resize((384, 384))
     img_tensor = torch.from_numpy(np.array(img_resized)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     mean = torch.as_tensor([0.5, 0.5, 0.5], device=img_tensor.device).view(1, 3, 1, 1)
     model = SimpleDPT(backbone_name='vit_base_patch16_384').to(device)
     model.eval()
+    # Depth inference
     with torch.no_grad():
         target_size = img_pil.size[::-1]
         depth_map = model(img_tensor.to(device), target_size=target_size)
     # Normalize depth
     depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+    # Normal map
     normal_map = depth_to_normal(depth_vis)
+    # Shading map (CLAHE)
     img_lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
     l_channel, _, _ = cv2.split(img_lab)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     l_clahe = clahe.apply(l_channel)
     shading_map = l_clahe / 255.0
+    # Tile pattern
     pattern_np = np.array(pattern_image.convert("RGB"))
     target_h, target_w = img_np.shape[:2]
+    pattern_h, pattern_w = pattern_np.shape[:2]
     pattern_tiled = np.zeros((target_h, target_w, 3), dtype=np.uint8)
+    for y in range(0, target_h, pattern_h):
+        for x in range(0, target_w, pattern_w):
+            end_y = min(y + pattern_h, target_h)
+            end_x = min(x + pattern_w, target_w)
+            pattern_tiled[y:end_y, x:end_x] = pattern_np[0:(end_y - y), 0:(end_x - x)]
+    # Blend pattern
+    normal_map_loaded = normal_map.astype(np.float32)
+    shading_map_loaded = np.stack([shading_map] * 3, axis=-1)
+    alpha = 0.7
+    blended_shading = alpha * shading_map_loaded + (1 - alpha)
     pattern_folded = pattern_tiled.astype(np.float32) / 255.0 * blended_shading
+    normal_boost = 0.5 + 0.5 * normal_map_loaded[..., 2:3]
     pattern_folded *= normal_boost
     pattern_folded = np.clip(pattern_folded, 0, 1)
     # ==========================================================
+    # Background removal with post-processing (no duplicate blur)
     # ==========================================================
     buf = BytesIO()
     base_image.save(buf, format="PNG")
     base_bytes = buf.getvalue()
+    # Get RGBA from bgrem
     result_no_bg = bgrem_remove(base_bytes)
     mask_img = Image.open(BytesIO(result_no_bg)).convert("RGBA")
+    # Extract alpha and clean edges
+    mask_alpha = np.array(mask_img)[:, :, 3].astype(np.float32) / 255.0
+    # 1. Slightly stronger shrink (balanced)
+    k = 5
+    kernel = np.ones((k, k), np.uint8)   # slightly larger kernel
+    mask_binary = (mask_alpha > k/100).astype(np.uint8) * 255  # slightly stricter threshold
+    mask_eroded = cv2.erode(mask_binary, kernel, iterations=3)  # balanced erosion
+    # 2. Feather edges (blur)
+    mask_blurred = cv2.GaussianBlur(mask_eroded, (15, 15), sigmaX=3, sigmaY=3)
+    # 3. Normalize
+    mask_blurred = mask_blurred.astype(np.float32) / 255.0
+    # ================================
+    # NEW: SEAM-FIX UPSTREAM (3 steps)
+    # ================================
+    # (A) MASK EXPANSION / OVERLAP: expand slightly to ensure overlap across seams
+    overlap_iters = 2                # <-- tune: 1..3 (px-ish with 5x5 kernel)
+    # mask_expanded = cv2.dilate(mask_eroded, kernel, iterations=overlap_iters)   # old idea
+    # --- Better: expand the FLOAT feathered mask to preserve soft edge continuity ---
+    _mask_float = (mask_blurred * 255).astype(np.uint8)
+    _mask_expanded_u8 = cv2.dilate(_mask_float, kernel, iterations=overlap_iters)
+    mask_expanded = _mask_expanded_u8.astype(np.float32) / 255.0  # [0..1]
+    # (B) FEATHER AGAIN after expansion (very light) for a smooth transition band
+    mask_expanded = cv2.GaussianBlur((mask_expanded * 255).astype(np.uint8), (7, 7), sigmaX=1.5, sigmaY=1.5)
+    mask_expanded = mask_expanded.astype(np.float32) / 255.0
+    # (C) BLEED-COLOR FILLING in premultiplied space for near-edge pixels
+    #     - Create a thin edge band where alpha is small (e.g., up to 8%)
+    edge_upper = 0.08
+    # Final RGBA
     # mask_stack = np.stack([mask_blurred] * 3, axis=-1)
     # pattern_final = pattern_folded * mask_stack
+    # --- Replace above with expanded mask for overlap ---
+    mask_stack = np.stack([mask_expanded] * 3, axis=-1)
+    pattern_final = pattern_folded * mask_stack  # premultiplied RGB (color * alpha) with overlap
+    #     - Dilate premultiplied RGB slightly so edge pixels borrow nearby garment color
+    bleed_iters = 2               # <-- tune: 1..3
+    _kernel_bleed = np.ones((3, 3), np.uint8)
+    _premul_u8 = (pattern_final * 255).astype(np.uint8)
+    _premul_bleed = cv2.dilate(_premul_u8, _kernel_bleed, iterations=bleed_iters).astype(np.float32) / 255.0
+    #     - Replace only in the very thin edge band (alpha between 0 and edge_upper)
+    edge_band = (mask_expanded > 0.0) & (mask_expanded <= edge_upper)
     if np.any(edge_band):
+        pattern_final[edge_band] = _premul_bleed[edge_band]
+    # ================================
+    # END NEW: SEAM-FIX UPSTREAM
+    # ================================
+    # Premultiplied → Straight alpha
     eps = 1e-6
+    # _alpha = np.clip(mask_blurred, eps, 1.0)
+    # --- Use the expanded mask for export, with a small alpha floor to hide hairlines ---
+    alpha_floor = 0.02  # 2% floor; increase to 0.03 if a faint line persists
+    _alpha = np.clip(mask_expanded, max(alpha_floor, eps), 1.0)
+    _alpha3 = _alpha[..., None]
+    rgb_straight = np.clip(pattern_final / _alpha3, 0.0, 1.0)
     pattern_rgb = (rgb_straight * 255).astype(np.uint8)
+    alpha_channel = (_alpha * 255).astype(np.uint8)
     pattern_rgba = np.dstack((pattern_rgb, alpha_channel))
     return Image.fromarray(pattern_rgba, mode="RGBA")
 # ===============================
 # WRAPPER: ACCEPT BYTES OR BASE64
 # ===============================