Spaces:

lukeafullard
/

ImageProcessing

Running

lukeafullard commited on 29 days ago

Commit

5b003bf

verified ·

1 Parent(s): 169cdb3

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -36,7 +36,7 @@ def load_upscaler(scale=2):
 def safe_rembg_inference(model, image, device):
     """
-    Robust inference for RMBG-1.4 that handles different output formats.
     """
     w, h = image.size
@@ -52,22 +52,33 @@ def safe_rembg_inference(model, image, device):
     with torch.no_grad():
         outputs = model(input_images)
-    # FIX: Handle List vs Tuple vs Tensor output
-    # BiRefNet usually returns a list/tuple of tensors.
-    # The output we want is usually the LAST element or the FIRST depending on version.
-    # We check if 'outputs' is a sequence (list/tuple) and grab the tensor.
-    if isinstance(outputs, (list, tuple)):
-        # We assume the last element is the high-res prediction for RMBG-1.4
-        result_tensor = outputs[-1]
-        # Double check: if the result is still a list (nested), grab the first item
-        if isinstance(result_tensor, (list, tuple)):
-            result_tensor = result_tensor[0]
     else:
         result_tensor = outputs
     # Post-processing
-    pred = result_tensor.sigmoid().cpu()[0].squeeze()
     # Convert mask to PIL
     pred_pil = transforms.ToPILImage()(pred)

 def safe_rembg_inference(model, image, device):
     """
+    Robust inference for RMBG-1.4 that finds the correct mask tensor.
     """
     w, h = image.size
     with torch.no_grad():
         outputs = model(input_images)
+    # --- FIX START ---
+    result_tensor = None
+    # Priority 1: Check for explicit 'logits' attribute (Standard Hugging Face)
+    if hasattr(outputs, "logits"):
+        result_tensor = outputs.logits
+    # Priority 2: Iterate through list/tuple to find the 1-channel mask
+    elif isinstance(outputs, (list, tuple)):
+        for tensor in outputs:
+            # We are looking for shape [Batch, 1, Height, Width]
+            if isinstance(tensor, torch.Tensor) and tensor.dim() == 4 and tensor.shape[1] == 1:
+                result_tensor = tensor
+                break
+        # Fallback: If no 1-channel tensor found, take the first element
+        if result_tensor is None:
+            result_tensor = outputs[0]
+    # Priority 3: It's already a tensor
     else:
         result_tensor = outputs
+    # --- FIX END ---
     # Post-processing
+    # Squeeze removes batch dim (1, 1, 1024, 1024) -> (1024, 1024)
+    pred = result_tensor.squeeze().sigmoid().cpu()
     # Convert mask to PIL
     pred_pil = transforms.ToPILImage()(pred)