Spaces:

lukeafullard
/

ImageProcessing

Running

App Files Files Community

lukeafullard commited on 29 days ago

Commit

17ddc19

verified ·

1 Parent(s): 5b003bf

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +57 -43

src/streamlit_app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 from PIL import Image, ImageEnhance
 import torch
-import torch.nn.functional as F
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
@@ -15,6 +14,7 @@ st.set_page_config(layout="wide", page_title="AI Image Lab")
 @st.cache_resource
 def load_rembg_model():
     """Loads RMBG-1.4 for Background Removal."""
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
@@ -34,9 +34,42 @@ def load_upscaler(scale=2):
 # --- 2. PROCESSING FUNCTIONS ---
 def safe_rembg_inference(model, image, device):
     """
-    Robust inference for RMBG-1.4 that finds the correct mask tensor.
     """
     w, h = image.size
@@ -52,34 +85,31 @@ def safe_rembg_inference(model, image, device):
     with torch.no_grad():
         outputs = model(input_images)
-    # --- FIX START ---
-    result_tensor = None
-    # Priority 1: Check for explicit 'logits' attribute (Standard Hugging Face)
-    if hasattr(outputs, "logits"):
-        result_tensor = outputs.logits
-    # Priority 2: Iterate through list/tuple to find the 1-channel mask
-    elif isinstance(outputs, (list, tuple)):
-        for tensor in outputs:
-            # We are looking for shape [Batch, 1, Height, Width]
-            if isinstance(tensor, torch.Tensor) and tensor.dim() == 4 and tensor.shape[1] == 1:
-                result_tensor = tensor
-                break
-        # Fallback: If no 1-channel tensor found, take the first element
-        if result_tensor is None:
             result_tensor = outputs[0]
-    # Priority 3: It's already a tensor
-    else:
-        result_tensor = outputs
-    # --- FIX END ---
     # Post-processing
-    # Squeeze removes batch dim (1, 1, 1024, 1024) -> (1024, 1024)
-    pred = result_tensor.squeeze().sigmoid().cpu()
     # Convert mask to PIL
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
@@ -89,30 +119,16 @@ def safe_rembg_inference(model, image, device):
     return image
 def ai_upscale(image, processor, model):
-    """
-    Upscales RGB image using Swin2SR.
-    Note: Swin2SR only works on RGB. If image is RGBA, we must handle Alpha separately.
-    """
-    # 1. Handle Alpha Channel (if exists)
     if image.mode == 'RGBA':
-        # Split RGB and Alpha
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
-        # Upscale RGB using AI
         upscaled_rgb = run_swin_inference(rgb_image, processor, model)
-        # Upscale Alpha using standard interpolation (AI models don't predict alpha)
-        # We resize alpha to match the new RGB size
         upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
-        # Recombine
         return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
     else:
         return run_swin_inference(image, processor, model)
 def run_swin_inference(image, processor, model):
-    """Helper to run the actual Swin2SR inference on an RGB image."""
     inputs = processor(image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
@@ -131,7 +147,7 @@ def convert_image_to_bytes(img):
 def main():
     st.title("✨ AI Image Lab: Robust Edition")
-    st.markdown("Features: **RMBG-1.4 (No ONNX)** | **Swin2SR (Upscaling)** | **Geometry**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
@@ -148,11 +164,9 @@ def main():
     if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
-        # Create a working copy
         processed_image = image.copy()
-        # 1. Remove Background (Do this first so we have the mask)
         if remove_bg:
             st.info("Loading RMBG Model...")
             try:

 import streamlit as st
 from PIL import Image, ImageEnhance
 import torch
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 @st.cache_resource
 def load_rembg_model():
     """Loads RMBG-1.4 for Background Removal."""
+    # We use 'briaai/RMBG-1.4'
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 # --- 2. PROCESSING FUNCTIONS ---
+def find_mask_tensor(output):
+    """
+    Recursively searches any nested structure (list, tuple, dict, object)
+    to find the first Tensor that looks like a mask (1 channel).
+    """
+    # 1. If it's a Tensor, check if it's the mask we want
+    if isinstance(output, torch.Tensor):
+        # We look for shape [Batch, 1, H, W] or [1, H, W]
+        # It must have 1 channel (index 1 for 4D, index 0 for 3D)
+        if output.dim() == 4 and output.shape[1] == 1:
+            return output
+        elif output.dim() == 3 and output.shape[0] == 1:
+            return output
+        # If it has > 1 channels (e.g. 64), it's a feature map, ignore it.
+        return None
+    # 2. If it's a Dict/ModelOutput (like .logits), check values
+    if hasattr(output, "items"):
+        for val in output.values():
+            found = find_mask_tensor(val)
+            if found is not None: return found
+    # Special case for Hugging Face model outputs with attributes
+    elif hasattr(output, "logits"):
+        return find_mask_tensor(output.logits)
+    # 3. If it's a List or Tuple, iterate through elements
+    elif isinstance(output, (list, tuple)):
+        for item in output:
+            found = find_mask_tensor(item)
+            if found is not None: return found
+    return None
 def safe_rembg_inference(model, image, device):
     """
+    Robust inference for RMBG-1.4 using Deep Search.
     """
     w, h = image.size
     with torch.no_grad():
         outputs = model(input_images)
+    # --- DEEP SEARCH FOR MASK ---
+    result_tensor = find_mask_tensor(outputs)
+    if result_tensor is None:
+        # Fallback: If deep search failed, try just grabbing the first tensor found
+        # (Even if dimensions look weird, it's better than crashing)
+        if isinstance(outputs, (list, tuple)):
             result_tensor = outputs[0]
+        else:
+            result_tensor = outputs
     # Post-processing
+    # Ensure it's a tensor before operations
+    if not isinstance(result_tensor, torch.Tensor):
+        # If we still have a list here, we take the first element blindly
+        if isinstance(result_tensor, (list, tuple)):
+             result_tensor = result_tensor[0]
+    pred = result_tensor.squeeze().cpu()
+    # Sometimes output is already sigmoid, sometimes logits.
+    # If values are > 1 or < 0, apply sigmoid.
+    if pred.max() > 1 or pred.min() < 0:
+        pred = pred.sigmoid()
     # Convert mask to PIL
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     return image
 def ai_upscale(image, processor, model):
     if image.mode == 'RGBA':
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
         upscaled_rgb = run_swin_inference(rgb_image, processor, model)
         upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
         return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
     else:
         return run_swin_inference(image, processor, model)
 def run_swin_inference(image, processor, model):
     inputs = processor(image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
 def main():
     st.title("✨ AI Image Lab: Robust Edition")
+    st.markdown("Features: **RMBG-1.4 (Pure PyTorch)** | **Swin2SR (Upscaling)** | **Geometry**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
         processed_image = image.copy()
+        # 1. Background
         if remove_bg:
             st.info("Loading RMBG Model...")
             try: