Spaces:

enoky
/

2D-to-Stereo-3D

Running

App Files Files Community

enoky commited on 15 days ago

Commit

79bdec3

verified ·

1 Parent(s): db1a689

add mask dilation

Browse files

Files changed (1) hide show

app.py +12 -3

app.py CHANGED Viewed

@@ -21,7 +21,6 @@ def load_models():
     print("Loading LaMa Inpainting Model...")
     # 2. LaMa Inpainting Model (TorchScript)
     # We download the .pt file directly from a repository that hosts the compiled JIT version.
-    # This avoids dealing with .ckpt files and source code dependencies.
     try:
         model_path = hf_hub_download(repo_id="fashn-ai/LaMa", filename="big-lama.pt")
@@ -89,13 +88,20 @@ def run_local_lama(image_bgr, mask_float):
     image_bgr: HxWx3 uint8 numpy array
     mask_float: HxW float32 numpy array (1.0 = hole, 0.0 = valid)
     """
     # 1. Resize to be divisible by 8 (LaMa requirement)
     h, w = image_bgr.shape[:2]
     new_h = (h // 8) * 8
     new_w = (w // 8) * 8
     img_resized = cv2.resize(image_bgr, (new_w, new_h))
-    mask_resized = cv2.resize(mask_float, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
     # 2. Convert to Torch Tensors
     # Image: (1, 3, H, W), RGB, 0-1
@@ -104,7 +110,7 @@ def run_local_lama(image_bgr, mask_float):
     img_t = img_t[:, [2, 1, 0], :, :]
     # Mask: (1, 1, H, W), 0-1
-    mask_t = torch.from_numpy(mask_resized).float().unsqueeze(0).unsqueeze(0)
     # Binary threshold just in case
     mask_t = (mask_t > 0.5).float()
@@ -112,6 +118,9 @@ def run_local_lama(image_bgr, mask_float):
     mask_t = mask_t.to(device)
     # 3. Inference
     inpainted_t = lama_model(img_t, mask_t)
     # 4. Post-process

     print("Loading LaMa Inpainting Model...")
     # 2. LaMa Inpainting Model (TorchScript)
     # We download the .pt file directly from a repository that hosts the compiled JIT version.
     try:
         model_path = hf_hub_download(repo_id="fashn-ai/LaMa", filename="big-lama.pt")
     image_bgr: HxWx3 uint8 numpy array
     mask_float: HxW float32 numpy array (1.0 = hole, 0.0 = valid)
     """
+    # 0. Dilate Mask (Fixes smearing/streaking)
+    # We expand the "hole" area (values of 1) to cover the jagged edges
+    # created by the pixel shift. This forces LaMa to regenerate the boundary.
+    kernel = np.ones((5, 5), np.uint8)
+    mask_uint8 = (mask_float * 255).astype(np.uint8)
+    mask_dilated = cv2.dilate(mask_uint8, kernel, iterations=1)
     # 1. Resize to be divisible by 8 (LaMa requirement)
     h, w = image_bgr.shape[:2]
     new_h = (h // 8) * 8
     new_w = (w // 8) * 8
     img_resized = cv2.resize(image_bgr, (new_w, new_h))
+    mask_resized = cv2.resize(mask_dilated, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
     # 2. Convert to Torch Tensors
     # Image: (1, 3, H, W), RGB, 0-1
     img_t = img_t[:, [2, 1, 0], :, :]
     # Mask: (1, 1, H, W), 0-1
+    mask_t = torch.from_numpy(mask_resized).float().unsqueeze(0).unsqueeze(0) / 255.0
     # Binary threshold just in case
     mask_t = (mask_t > 0.5).float()
     mask_t = mask_t.to(device)
     # 3. Inference
+    # LaMa expects the image to be masked (zeroed out) in the hole regions for best results
+    img_t = img_t * (1 - mask_t)
     inpainted_t = lama_model(img_t, mask_t)
     # 4. Post-process