Spaces:

lukeafullard
/

ImageProcessing

Running

App Files Files Community

lukeafullard commited on 30 days ago

Commit

a533ab1

verified ·

1 Parent(s): d21ed78

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +96 -68

src/streamlit_app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import streamlit as st
-from PIL import Image, ImageEnhance
 import torch
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
-# Page Configuration g
 st.set_page_config(layout="wide", page_title="AI Image Lab")
 # --- 1. MODEL LOADING (Cached) ---
@@ -14,7 +14,6 @@ st.set_page_config(layout="wide", page_title="AI Image Lab")
 @st.cache_resource
 def load_rembg_model():
     """Loads RMBG-1.4 for Background Removal."""
-    # We use 'briaai/RMBG-1.4'
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
@@ -35,45 +34,30 @@ def load_upscaler(scale=2):
 # --- 2. PROCESSING FUNCTIONS ---
 def find_mask_tensor(output):
-    """
-    Recursively searches any nested structure (list, tuple, dict, object)
-    to find the first Tensor that looks like a mask (1 channel).
-    """
-    # 1. If it's a Tensor, check if it's the mask we want
     if isinstance(output, torch.Tensor):
-        # We look for shape [Batch, 1, H, W] or [1, H, W]
-        # It must have 1 channel (index 1 for 4D, index 0 for 3D)
         if output.dim() == 4 and output.shape[1] == 1:
             return output
         elif output.dim() == 3 and output.shape[0] == 1:
             return output
-        # If it has > 1 channels (e.g. 64), it's a feature map, ignore it.
         return None
-    # 2. If it's a Dict/ModelOutput (like .logits), check values
-    if hasattr(output, "items"):
-        for val in output.values():
-            found = find_mask_tensor(val)
-            if found is not None: return found
-    # Special case for Hugging Face model outputs with attributes
-    elif hasattr(output, "logits"):
         return find_mask_tensor(output.logits)
-    # 3. If it's a List or Tuple, iterate through elements
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
             if found is not None: return found
     return None
 def safe_rembg_inference(model, image, device):
-    """
-    Robust inference for RMBG-1.4 using Deep Search.
-    """
     w, h = image.size
-    # Preprocessing
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
         transforms.ToTensor(),
@@ -81,44 +65,37 @@ def safe_rembg_inference(model, image, device):
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
-    # Inference
     with torch.no_grad():
         outputs = model(input_images)
-    # --- DEEP SEARCH FOR MASK ---
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None:
-        # Fallback: If deep search failed, try just grabbing the first tensor found
-        # (Even if dimensions look weird, it's better than crashing)
-        if isinstance(outputs, (list, tuple)):
-            result_tensor = outputs[0]
-        else:
-            result_tensor = outputs
-    # Post-processing
-    # Ensure it's a tensor before operations
     if not isinstance(result_tensor, torch.Tensor):
-        # If we still have a list here, we take the first element blindly
-        if isinstance(result_tensor, (list, tuple)):
-             result_tensor = result_tensor[0]
     pred = result_tensor.squeeze().cpu()
-    # Sometimes output is already sigmoid, sometimes logits.
-    # If values are > 1 or < 0, apply sigmoid.
-    if pred.max() > 1 or pred.min() < 0:
-        pred = pred.sigmoid()
-    # Convert mask to PIL
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
-    # Apply mask
     image.putalpha(mask)
     return image
-def ai_upscale(image, processor, model):
     if image.mode == 'RGBA':
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
@@ -128,15 +105,46 @@ def ai_upscale(image, processor, model):
     else:
         return run_swin_inference(image, processor, model)
-def run_swin_inference(image, processor, model):
-    inputs = processor(image, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-    output = np.moveaxis(output, 0, -1)
-    output = (output * 255.0).round().astype(np.uint8)
-    return Image.fromarray(output)
 def convert_image_to_bytes(img):
     buf = io.BytesIO()
@@ -146,15 +154,15 @@ def convert_image_to_bytes(img):
 # --- 3. MAIN APP ---
 def main():
-    st.title("✨ AI Image Lab: Robust Edition")
-    st.markdown("Features: **RMBG-1.4 (Pure PyTorch)** | **Swin2SR (Upscaling)** | **Geometry**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
     st.sidebar.header("2. AI Upscaling")
-    upscale_mode = st.sidebar.radio("Magnification", ["None", "2x (Fast)", "4x (Slow)"])
     st.sidebar.header("3. Geometry")
     rotate_angle = st.sidebar.slider("Rotate", -180, 180, 0, 1)
@@ -179,13 +187,33 @@ def main():
         # 2. Upscaling
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
-            st.info(f"Loading Swin2SR x{scale} Model...")
-            try:
-                processor, upscaler = load_upscaler(scale)
-                with st.spinner(f"Upscaling x{scale}..."):
-                    processed_image = ai_upscale(processed_image, processor, upscaler)
-            except Exception as e:
-                st.error(f"Upscaling Failed: {e}")
         # 3. Rotation
         if rotate_angle != 0:

 import streamlit as st
+from PIL import Image
 import torch
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
+# Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
 # --- 1. MODEL LOADING (Cached) ---
 @st.cache_resource
 def load_rembg_model():
     """Loads RMBG-1.4 for Background Removal."""
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 # --- 2. PROCESSING FUNCTIONS ---
 def find_mask_tensor(output):
+    """Recursively finds the mask tensor in complex model outputs."""
     if isinstance(output, torch.Tensor):
         if output.dim() == 4 and output.shape[1] == 1:
             return output
         elif output.dim() == 3 and output.shape[0] == 1:
             return output
         return None
+    if hasattr(output, "logits"):
         return find_mask_tensor(output.logits)
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
             if found is not None: return found
+    elif hasattr(output, "items"):
+        for val in output.values():
+            found = find_mask_tensor(val)
+            if found is not None: return found
     return None
 def safe_rembg_inference(model, image, device):
+    """Robust background removal inference."""
     w, h = image.size
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
         transforms.ToTensor(),
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
     with torch.no_grad():
         outputs = model(input_images)
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None:
+        result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
+         if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
     pred = result_tensor.squeeze().cpu()
+    if pred.max() > 1 or pred.min() < 0: pred = pred.sigmoid()
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     image.putalpha(mask)
     return image
+def run_swin_inference(image, processor, model):
+    """Atomic inference for a single image/tile."""
+    inputs = processor(image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.moveaxis(output, 0, -1)
+    output = (output * 255.0).round().astype(np.uint8)
+    return Image.fromarray(output)
+def upscale_image_logic(image, processor, model):
+    """Handles RGBA vs RGB logic for a single chunk."""
     if image.mode == 'RGBA':
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
     else:
         return run_swin_inference(image, processor, model)
+def tiled_upscale(image, processor, model, scale_factor, progress_bar):
+    """
+    Splits image into a 2x2 grid, upscales each tile, and updates progress bar.
+    """
+    rows, cols = 2, 2  # Split into 4 tiles
+    w, h = image.size
+    # Calculate tile sizes
+    tile_w = w // cols
+    tile_h = h // rows
+    full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
+    total_tiles = rows * cols
+    count = 0
+    for y in range(rows):
+        for x in range(cols):
+            # Define crop box
+            left = x * tile_w
+            upper = y * tile_h
+            # Ensure the last tile takes the remaining pixels (fixes rounding errors)
+            right = w if x == cols - 1 else (x + 1) * tile_w
+            lower = h if y == rows - 1 else (y + 1) * tile_h
+            # Crop
+            tile = image.crop((left, upper, right, lower))
+            # Upscale the tile
+            upscaled_tile = upscale_image_logic(tile, processor, model)
+            # Paste into new canvas
+            paste_x = left * scale_factor
+            paste_y = upper * scale_factor
+            full_image.paste(upscaled_tile, (paste_x, paste_y))
+            # Update Progress
+            count += 1
+            progress_bar.progress(count / total_tiles, text=f"Upscaling Tile {count}/{total_tiles}...")
+    return full_image
 def convert_image_to_bytes(img):
     buf = io.BytesIO()
 # --- 3. MAIN APP ---
 def main():
+    st.title("✨ AI Image Lab: Tiled Edition")
+    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Tiled)** | **Geometry**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
     st.sidebar.header("2. AI Upscaling")
+    upscale_mode = st.sidebar.radio("Magnification", ["None", "2x (Fast)", "4x (Slow - Tiled)"])
     st.sidebar.header("3. Geometry")
     rotate_angle = st.sidebar.slider("Rotate", -180, 180, 0, 1)
         # 2. Upscaling
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
+            # If 4x, use the Progress Bar + Tiling method
+            if scale == 4:
+                st.info(f"Loading Swin2SR x{scale} Model...")
+                try:
+                    processor, upscaler = load_upscaler(scale)
+                    # Create Progress Bar
+                    my_bar = st.progress(0, text="Starting Tiled Upscaling...")
+                    processed_image = tiled_upscale(processed_image, processor, upscaler, scale, my_bar)
+                    # Clear bar when done
+                    my_bar.empty()
+                except Exception as e:
+                    st.error(f"Upscaling Failed: {e}")
+            # If 2x, keep it simple (it's fast enough)
+            else:
+                st.info(f"Loading Swin2SR x{scale} Model...")
+                try:
+                    processor, upscaler = load_upscaler(scale)
+                    with st.spinner("Upscaling (2x)..."):
+                        processed_image = upscale_image_logic(processed_image, processor, upscaler)
+                except Exception as e:
+                    st.error(f"Upscaling Failed: {e}")
         # 3. Rotation
         if rotate_angle != 0: