Spaces:

lukeafullard
/

ImageProcessing

Sleeping

App Files Files Community

lukeafullard commited on Jan 3

Commit

1c883f5

verified ·

1 Parent(s): a533ab1

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +139 -100

src/streamlit_app.py CHANGED Viewed

@@ -5,15 +5,16 @@ from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
-# --- 1. MODEL LOADING (Cached) ---
 @st.cache_resource
 def load_rembg_model():
-    """Loads RMBG-1.4 for Background Removal."""
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
@@ -21,17 +22,15 @@ def load_rembg_model():
 @st.cache_resource
 def load_upscaler(scale=2):
-    """Loads Swin2SR for Super-Resolution (2x or 4x)."""
     if scale == 4:
         model_id = "caidas/swin2SR-classical-sr-x4-63"
     else:
         model_id = "caidas/swin2SR-classical-sr-x2-64"
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
-# --- 2. PROCESSING FUNCTIONS ---
 def find_mask_tensor(output):
     """Recursively finds the mask tensor in complex model outputs."""
@@ -41,22 +40,58 @@ def find_mask_tensor(output):
         elif output.dim() == 3 and output.shape[0] == 1:
             return output
         return None
     if hasattr(output, "logits"):
         return find_mask_tensor(output.logits)
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
             if found is not None: return found
-    elif hasattr(output, "items"):
-        for val in output.values():
-            found = find_mask_tensor(val)
-            if found is not None: return found
     return None
-def safe_rembg_inference(model, image, device):
-    """Robust background removal inference."""
     w, h = image.size
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
@@ -65,13 +100,15 @@ def safe_rembg_inference(model, image, device):
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
     with torch.no_grad():
         outputs = model(input_images)
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None:
         result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
@@ -81,143 +118,145 @@ def safe_rembg_inference(model, image, device):
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     image.putalpha(mask)
-    return image
-def run_swin_inference(image, processor, model):
-    """Atomic inference for a single image/tile."""
-    inputs = processor(image, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-    output = np.moveaxis(output, 0, -1)
-    output = (output * 255.0).round().astype(np.uint8)
-    return Image.fromarray(output)
-def upscale_image_logic(image, processor, model):
-    """Handles RGBA vs RGB logic for a single chunk."""
-    if image.mode == 'RGBA':
-        r, g, b, a = image.split()
-        rgb_image = Image.merge('RGB', (r, g, b))
-        upscaled_rgb = run_swin_inference(rgb_image, processor, model)
-        upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
-        return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
-    else:
-        return run_swin_inference(image, processor, model)
-def tiled_upscale(image, processor, model, scale_factor, progress_bar):
     """
-    Splits image into a 2x2 grid, upscales each tile, and updates progress bar.
     """
-    rows, cols = 2, 2  # Split into 4 tiles
     w, h = image.size
     # Calculate tile sizes
     tile_w = w // cols
     tile_h = h // rows
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
-            # Define crop box
             left = x * tile_w
             upper = y * tile_h
-            # Ensure the last tile takes the remaining pixels (fixes rounding errors)
             right = w if x == cols - 1 else (x + 1) * tile_w
             lower = h if y == rows - 1 else (y + 1) * tile_h
-            # Crop
             tile = image.crop((left, upper, right, lower))
-            # Upscale the tile
-            upscaled_tile = upscale_image_logic(tile, processor, model)
-            # Paste into new canvas
             paste_x = left * scale_factor
             paste_y = upper * scale_factor
             full_image.paste(upscaled_tile, (paste_x, paste_y))
-            # Update Progress
-            count += 1
-            progress_bar.progress(count / total_tiles, text=f"Upscaling Tile {count}/{total_tiles}...")
     return full_image
-def convert_image_to_bytes(img):
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    return buf.getvalue()
-# --- 3. MAIN APP ---
 def main():
-    st.title("✨ AI Image Lab: Tiled Edition")
-    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Tiled)** | **Geometry**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
     st.sidebar.header("2. AI Upscaling")
-    upscale_mode = st.sidebar.radio("Magnification", ["None", "2x (Fast)", "4x (Slow - Tiled)"])
     st.sidebar.header("3. Geometry")
     rotate_angle = st.sidebar.slider("Rotate", -180, 180, 0, 1)
-    # --- Main ---
     uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg", "webp"])
     if uploaded_file is not None:
-        image = Image.open(uploaded_file).convert("RGB")
-        processed_image = image.copy()
-        # 1. Background
         if remove_bg:
-            st.info("Loading RMBG Model...")
-            try:
-                bg_model, device = load_rembg_model()
-                with st.spinner("Removing background..."):
-                    processed_image = safe_rembg_inference(bg_model, processed_image, device)
-            except Exception as e:
-                st.error(f"Background Removal Failed: {e}")
-        # 2. Upscaling
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
-            # If 4x, use the Progress Bar + Tiling method
-            if scale == 4:
-                st.info(f"Loading Swin2SR x{scale} Model...")
-                try:
-                    processor, upscaler = load_upscaler(scale)
-                    # Create Progress Bar
-                    my_bar = st.progress(0, text="Starting Tiled Upscaling...")
-                    processed_image = tiled_upscale(processed_image, processor, upscaler, scale, my_bar)
-                    # Clear bar when done
-                    my_bar.empty()
-                except Exception as e:
-                    st.error(f"Upscaling Failed: {e}")
-            # If 2x, keep it simple (it's fast enough)
-            else:
-                st.info(f"Loading Swin2SR x{scale} Model...")
-                try:
-                    processor, upscaler = load_upscaler(scale)
-                    with st.spinner("Upscaling (2x)..."):
-                        processed_image = upscale_image_logic(processed_image, processor, upscaler)
-                except Exception as e:
-                    st.error(f"Upscaling Failed: {e}")
-        # 3. Rotation
         if rotate_angle != 0:
-            processed_image = processed_image.rotate(rotate_angle, expand=True)
         # --- Display ---
         col1, col2 = st.columns(2)
@@ -228,14 +267,14 @@ def main():
         with col2:
             st.subheader("Result")
-            st.image(processed_image, use_container_width=True)
-            st.caption(f"Size: {processed_image.size}")
         # --- Download ---
         st.markdown("---")
         st.download_button(
             label="💾 Download Result (PNG)",
-            data=convert_image_to_bytes(processed_image),
             file_name="processed_image.png",
             mime="image/png"
         )

 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
+import gc  # Garbage collection for memory safety
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
+# --- 1. MODEL LOADING (Cached Resource) ---
+# Models are loaded once and stay in memory.
 @st.cache_resource
 def load_rembg_model():
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 @st.cache_resource
 def load_upscaler(scale=2):
     if scale == 4:
         model_id = "caidas/swin2SR-classical-sr-x4-63"
     else:
         model_id = "caidas/swin2SR-classical-sr-x2-64"
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
+# --- 2. HELPER FUNCTIONS ---
 def find_mask_tensor(output):
     """Recursively finds the mask tensor in complex model outputs."""
         elif output.dim() == 3 and output.shape[0] == 1:
             return output
         return None
     if hasattr(output, "logits"):
         return find_mask_tensor(output.logits)
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
             if found is not None: return found
     return None
+def run_swin_inference(image, processor, model):
+    """Atomic inference for a single chunk."""
+    inputs = processor(image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.moveaxis(output, 0, -1)
+    output = (output * 255.0).round().astype(np.uint8)
+    return Image.fromarray(output)
+def upscale_chunk_logic(image, processor, model):
+    """Handles RGBA vs RGB logic for a single chunk."""
+    if image.mode == 'RGBA':
+        r, g, b, a = image.split()
+        rgb_image = Image.merge('RGB', (r, g, b))
+        upscaled_rgb = run_swin_inference(rgb_image, processor, model)
+        # Resize alpha to match new RGB size
+        upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
+        return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
+    else:
+        return run_swin_inference(image, processor, model)
+def convert_image_to_bytes(img):
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+# --- 3. HEAVY OPERATIONS (Cached Data) ---
+# These functions cache their results. If inputs (image/settings) don't change,
+# they return the previous result instantly without using RAM/CPU.
+@st.cache_data(show_spinner=False)
+def process_background_removal(image_bytes):
+    """
+    Removes background. Input is bytes to make it hashable for caching.
+    """
+    # Re-open image from bytes
+    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    # Load model
+    model, device = load_rembg_model()
+    # Preprocessing
     w, h = image.size
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
+    # Inference
     with torch.no_grad():
         outputs = model(input_images)
+    # Find Mask
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None:
         result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     image.putalpha(mask)
+    return image
+def process_tiled_upscale(image, scale_factor, grid_n, progress_bar=None):
     """
+    Splits image into n*n tiles, upscales each, and merges.
+    This function is NOT cached directly because it uses a progress bar (UI element).
+    We wrap the logic inside the main loop or a separate cached function if needed.
     """
+    # Load Model
+    processor, model = load_upscaler(scale_factor)
     w, h = image.size
+    rows = grid_n
+    cols = grid_n
     # Calculate tile sizes
     tile_w = w // cols
     tile_h = h // rows
+    # Create large canvas
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
+            # 1. Crop
             left = x * tile_w
             upper = y * tile_h
+            # Handle edge pixels (ensure last tile takes remainder)
             right = w if x == cols - 1 else (x + 1) * tile_w
             lower = h if y == rows - 1 else (y + 1) * tile_h
             tile = image.crop((left, upper, right, lower))
+            # 2. Upscale
+            upscaled_tile = upscale_chunk_logic(tile, processor, model)
+            # 3. Paste
             paste_x = left * scale_factor
             paste_y = upper * scale_factor
             full_image.paste(upscaled_tile, (paste_x, paste_y))
+            # 4. Memory Cleanup (Crucial for 16Gi limit)
+            del tile
+            del upscaled_tile
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            # 5. Update UI
+            count += 1
+            if progress_bar:
+                progress_bar.progress(count / total_tiles, text=f"Processing Tile {count}/{total_tiles}...")
     return full_image
+# Wrapper for caching the upscale result (without progress bar args)
+@st.cache_data(show_spinner=False)
+def cached_upscale_wrapper(image_bytes, scale_factor, grid_n):
+    """
+    This wrapper allows us to cache the upscale result.
+    We convert PIL->Bytes->PIL inside to ensure Streamlit can hash the input.
+    """
+    image = Image.open(io.BytesIO(image_bytes))
+    # We cannot pass the progress bar to a cached function,
+    # so we run it without the bar or handle the bar outside.
+    # For caching purposes, we run it 'quietly'.
+    return process_tiled_upscale(image, scale_factor, grid_n, progress_bar=None)
+# --- 4. MAIN APP ---
 def main():
+    st.title("✨ AI Image Lab: Memory Safe")
+    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Tiled)** | **Smart Caching**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
     st.sidebar.header("2. AI Upscaling")
+    upscale_mode = st.sidebar.radio("Magnification", ["None", "2x", "4x"])
+    # Grid Slider for Memory Safety
+    if upscale_mode != "None":
+        grid_n = st.sidebar.slider(
+            "Grid Split (Memory Saver)",
+            min_value=2,
+            max_value=8,
+            value=4,
+            help="Higher = Less RAM used, but slightly slower. If crashing, increase this!"
+        )
+        st.sidebar.info(f"Splitting image into {grid_n*grid_n} pieces.")
+    else:
+        grid_n = 2
     st.sidebar.header("3. Geometry")
     rotate_angle = st.sidebar.slider("Rotate", -180, 180, 0, 1)
+    # --- Main Logic ---
     uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg", "webp"])
     if uploaded_file is not None:
+        # Load Original
+        file_bytes = uploaded_file.getvalue() # Keep raw bytes for caching references
+        image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+        # --- PIPELINE START ---
+        # Step 1: Background Removal (Cached)
         if remove_bg:
+            with st.spinner("Removing background..."):
+                # We pass bytes to the cached function
+                processed_image = process_background_removal(file_bytes)
+        else:
+            processed_image = image
+        # Step 2: Upscaling (Cached manually or via wrapper)
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
+            # Convert current stage to bytes for cache key
+            current_stage_bytes = convert_image_to_bytes(processed_image)
+            # Check if we should use the cached wrapper or run with progress bar
+            # To preserve the "Progress Bar" experience while still caching, we can:
+            # Check if it's already in cache? Streamlit doesn't expose `is_cached`.
+            # We will use the cached wrapper. The downside: the first run won't show the detailed tile progress
+            # inside the cached function, just the spinner.
+            with st.spinner(f"Upscaling x{scale} ({grid_n*grid_n} tiles)..."):
+                 processed_image = cached_upscale_wrapper(current_stage_bytes, scale, grid_n)
+        # Step 3: Geometry (Fast - No Caching needed, applied on top)
+        # This runs every time you move the slider, but Step 1 & 2 use cache, so it's instant.
+        final_image = processed_image.copy()
         if rotate_angle != 0:
+            final_image = final_image.rotate(rotate_angle, expand=True)
         # --- Display ---
         col1, col2 = st.columns(2)
         with col2:
             st.subheader("Result")
+            st.image(final_image, use_container_width=True)
+            st.caption(f"Size: {final_image.size}")
         # --- Download ---
         st.markdown("---")
         st.download_button(
             label="💾 Download Result (PNG)",
+            data=convert_image_to_bytes(final_image),
             file_name="processed_image.png",
             mime="image/png"
         )