Spaces:

lukeafullard
/

ImageProcessing

Running

App Files Files Community

lukeafullard commited on about 1 month ago

Commit

8de6538

verified ·

1 Parent(s): 1c883f5

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +85 -118

src/streamlit_app.py CHANGED Viewed

@@ -5,13 +5,12 @@ from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
-import gc  # Garbage collection for memory safety
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
-# --- 1. MODEL LOADING (Cached Resource) ---
-# Models are loaded once and stay in memory.
 @st.cache_resource
 def load_rembg_model():
@@ -30,18 +29,14 @@ def load_upscaler(scale=2):
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
-# --- 2. HELPER FUNCTIONS ---
 def find_mask_tensor(output):
-    """Recursively finds the mask tensor in complex model outputs."""
     if isinstance(output, torch.Tensor):
-        if output.dim() == 4 and output.shape[1] == 1:
-            return output
-        elif output.dim() == 3 and output.shape[0] == 1:
-            return output
         return None
-    if hasattr(output, "logits"):
-        return find_mask_tensor(output.logits)
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
@@ -49,49 +44,30 @@ def find_mask_tensor(output):
     return None
 def run_swin_inference(image, processor, model):
-    """Atomic inference for a single chunk."""
     inputs = processor(image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
     output = np.moveaxis(output, 0, -1)
     output = (output * 255.0).round().astype(np.uint8)
     return Image.fromarray(output)
 def upscale_chunk_logic(image, processor, model):
-    """Handles RGBA vs RGB logic for a single chunk."""
     if image.mode == 'RGBA':
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
         upscaled_rgb = run_swin_inference(rgb_image, processor, model)
-        # Resize alpha to match new RGB size
         upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
         return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
     else:
         return run_swin_inference(image, processor, model)
-def convert_image_to_bytes(img):
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    return buf.getvalue()
-# --- 3. HEAVY OPERATIONS (Cached Data) ---
-# These functions cache their results. If inputs (image/settings) don't change,
-# they return the previous result instantly without using RAM/CPU.
 @st.cache_data(show_spinner=False)
 def process_background_removal(image_bytes):
-    """
-    Removes background. Input is bytes to make it hashable for caching.
-    """
-    # Re-open image from bytes
     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-    # Load model
     model, device = load_rembg_model()
-    # Preprocessing
     w, h = image.size
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
@@ -100,15 +76,11 @@ def process_background_removal(image_bytes):
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
-    # Inference
     with torch.no_grad():
         outputs = model(input_images)
-    # Find Mask
     result_tensor = find_mask_tensor(outputs)
-    if result_tensor is None:
-        result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
@@ -118,82 +90,90 @@ def process_background_removal(image_bytes):
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     image.putalpha(mask)
     return image
-def process_tiled_upscale(image, scale_factor, grid_n, progress_bar=None):
     """
-    Splits image into n*n tiles, upscales each, and merges.
-    This function is NOT cached directly because it uses a progress bar (UI element).
-    We wrap the logic inside the main loop or a separate cached function if needed.
     """
-    # Load Model
     processor, model = load_upscaler(scale_factor)
     w, h = image.size
-    rows = grid_n
-    cols = grid_n
-    # Calculate tile sizes
     tile_w = w // cols
     tile_h = h // rows
-    # Create large canvas
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
-            # 1. Crop
-            left = x * tile_w
-            upper = y * tile_h
-            # Handle edge pixels (ensure last tile takes remainder)
-            right = w if x == cols - 1 else (x + 1) * tile_w
-            lower = h if y == rows - 1 else (y + 1) * tile_h
-            tile = image.crop((left, upper, right, lower))
-            # 2. Upscale
             upscaled_tile = upscale_chunk_logic(tile, processor, model)
-            # 3. Paste
-            paste_x = left * scale_factor
-            paste_y = upper * scale_factor
-            full_image.paste(upscaled_tile, (paste_x, paste_y))
-            # 4. Memory Cleanup (Crucial for 16Gi limit)
-            del tile
-            del upscaled_tile
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            # 5. Update UI
             count += 1
-            if progress_bar:
-                progress_bar.progress(count / total_tiles, text=f"Processing Tile {count}/{total_tiles}...")
     return full_image
-# Wrapper for caching the upscale result (without progress bar args)
-@st.cache_data(show_spinner=False)
-def cached_upscale_wrapper(image_bytes, scale_factor, grid_n):
-    """
-    This wrapper allows us to cache the upscale result.
-    We convert PIL->Bytes->PIL inside to ensure Streamlit can hash the input.
-    """
-    image = Image.open(io.BytesIO(image_bytes))
-    # We cannot pass the progress bar to a cached function,
-    # so we run it without the bar or handle the bar outside.
-    # For caching purposes, we run it 'quietly'.
-    return process_tiled_upscale(image, scale_factor, grid_n, progress_bar=None)
-# --- 4. MAIN APP ---
 def main():
-    st.title("✨ AI Image Lab: Memory Safe")
-    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Tiled)** | **Smart Caching**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
@@ -202,16 +182,8 @@ def main():
     st.sidebar.header("2. AI Upscaling")
     upscale_mode = st.sidebar.radio("Magnification", ["None", "2x", "4x"])
-    # Grid Slider for Memory Safety
     if upscale_mode != "None":
-        grid_n = st.sidebar.slider(
-            "Grid Split (Memory Saver)",
-            min_value=2,
-            max_value=8,
-            value=4,
-            help="Higher = Less RAM used, but slightly slower. If crashing, increase this!"
-        )
-        st.sidebar.info(f"Splitting image into {grid_n*grid_n} pieces.")
     else:
         grid_n = 2
@@ -222,38 +194,34 @@ def main():
     uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg", "webp"])
     if uploaded_file is not None:
-        # Load Original
-        file_bytes = uploaded_file.getvalue() # Keep raw bytes for caching references
-        image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
-        # --- PIPELINE START ---
-        # Step 1: Background Removal (Cached)
         if remove_bg:
-            with st.spinner("Removing background..."):
-                # We pass bytes to the cached function
-                processed_image = process_background_removal(file_bytes)
         else:
-            processed_image = image
-        # Step 2: Upscaling (Cached manually or via wrapper)
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
-            # Convert current stage to bytes for cache key
-            current_stage_bytes = convert_image_to_bytes(processed_image)
-            # Check if we should use the cached wrapper or run with progress bar
-            # To preserve the "Progress Bar" experience while still caching, we can:
-            # Check if it's already in cache? Streamlit doesn't expose `is_cached`.
-            # We will use the cached wrapper. The downside: the first run won't show the detailed tile progress
-            # inside the cached function, just the spinner.
-            with st.spinner(f"Upscaling x{scale} ({grid_n*grid_n} tiles)..."):
-                 processed_image = cached_upscale_wrapper(current_stage_bytes, scale, grid_n)
-        # Step 3: Geometry (Fast - No Caching needed, applied on top)
-        # This runs every time you move the slider, but Step 1 & 2 use cache, so it's instant.
         final_image = processed_image.copy()
         if rotate_angle != 0:
             final_image = final_image.rotate(rotate_angle, expand=True)
@@ -262,15 +230,14 @@ def main():
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Original")
-            st.image(image, use_container_width=True)
-            st.caption(f"Size: {image.size}")
         with col2:
             st.subheader("Result")
             st.image(final_image, use_container_width=True)
             st.caption(f"Size: {final_image.size}")
-        # --- Download ---
         st.markdown("---")
         st.download_button(
             label="💾 Download Result (PNG)",

 from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
+import gc
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
+# --- 1. MODEL LOADING ---
 @st.cache_resource
 def load_rembg_model():
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
+# --- 2. PROCESSING LOGIC ---
 def find_mask_tensor(output):
     if isinstance(output, torch.Tensor):
+        if output.dim() == 4 and output.shape[1] == 1: return output
+        elif output.dim() == 3 and output.shape[0] == 1: return output
         return None
+    if hasattr(output, "logits"): return find_mask_tensor(output.logits)
     elif isinstance(output, (list, tuple)):
         for item in output:
             found = find_mask_tensor(item)
     return None
 def run_swin_inference(image, processor, model):
     inputs = processor(image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
     output = np.moveaxis(output, 0, -1)
     output = (output * 255.0).round().astype(np.uint8)
     return Image.fromarray(output)
 def upscale_chunk_logic(image, processor, model):
     if image.mode == 'RGBA':
         r, g, b, a = image.split()
         rgb_image = Image.merge('RGB', (r, g, b))
         upscaled_rgb = run_swin_inference(rgb_image, processor, model)
         upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
         return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
     else:
         return run_swin_inference(image, processor, model)
 @st.cache_data(show_spinner=False)
 def process_background_removal(image_bytes):
+    """Cached background removal."""
     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
     model, device = load_rembg_model()
     w, h = image.size
     transform_image = transforms.Compose([
         transforms.Resize((1024, 1024)),
     ])
     input_images = transform_image(image).unsqueeze(0).to(device)
     with torch.no_grad():
         outputs = model(input_images)
     result_tensor = find_mask_tensor(outputs)
+    if result_tensor is None: result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize((w, h))
     image.putalpha(mask)
     return image
+def process_tiled_upscale(image, scale_factor, grid_n, progress_bar):
     """
+    Tiled upscaling with OVERLAP to prevent edge artifacts.
     """
     processor, model = load_upscaler(scale_factor)
     w, h = image.size
+    rows = cols = grid_n
+    # Base tile size (without overlap)
     tile_w = w // cols
     tile_h = h // rows
+    # Overlap buffer (pixels) - lets the AI see context
+    overlap = 32
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
+            # 1. Define the "Target" area (where this tile goes in the original)
+            target_left = x * tile_w
+            target_upper = y * tile_h
+            # Handle edge pixels for the last column/row
+            target_right = w if x == cols - 1 else (x + 1) * tile_w
+            target_lower = h if y == rows - 1 else (y + 1) * tile_h
+            target_w = target_right - target_left
+            target_h = target_lower - target_upper
+            # 2. Define the "Source" area (Target + Overlap)
+            # We expand the box outwards by 'overlap' px, but keep it within image bounds
+            source_left = max(0, target_left - overlap)
+            source_upper = max(0, target_upper - overlap)
+            source_right = min(w, target_right + overlap)
+            source_lower = min(h, target_lower + overlap)
+            # Crop the padded tile
+            tile = image.crop((source_left, source_upper, source_right, source_lower))
+            # 3. Upscale the Padded Tile
             upscaled_tile = upscale_chunk_logic(tile, processor, model)
+            # 4. Crop the "Valid" center from the upscaled tile
+            # Calculate how much extra we took on the Left and Top (in original scale)
+            extra_left = target_left - source_left
+            extra_upper = target_upper - source_upper
+            # Convert these offsets to the new Upscaled Scale
+            crop_x = extra_left * scale_factor
+            crop_y = extra_upper * scale_factor
+            crop_w = target_w * scale_factor
+            crop_h = target_h * scale_factor
+            # Perform the final crop to remove the overlap borders
+            clean_tile = upscaled_tile.crop((crop_x, crop_y, crop_x + crop_w, crop_y + crop_h))
+            # 5. Paste the clean tile
+            paste_x = target_left * scale_factor
+            paste_y = target_upper * scale_factor
+            full_image.paste(clean_tile, (paste_x, paste_y))
+            # Cleanup
+            del tile, upscaled_tile, clean_tile
+            gc.collect()
             count += 1
+            progress_bar.progress(count / total_tiles, text=f"Upscaling Tile {count}/{total_tiles} (with overlap)...")
     return full_image
+def convert_image_to_bytes(img):
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+# --- 3. MAIN APP ---
 def main():
+    st.title("✨ AI Image Lab: Seamless Edition")
+    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Seamless Tiling)** | **Progress Bar**")
     # --- Sidebar ---
     st.sidebar.header("1. Background")
     st.sidebar.header("2. AI Upscaling")
     upscale_mode = st.sidebar.radio("Magnification", ["None", "2x", "4x"])
     if upscale_mode != "None":
+        grid_n = st.sidebar.slider("Grid Split", 2, 8, 4, help="Higher = Safer RAM usage")
     else:
         grid_n = 2
     uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg", "webp"])
     if uploaded_file is not None:
+        file_bytes = uploaded_file.getvalue()
+        # 1. Background Removal
         if remove_bg:
+            processed_image = process_background_removal(file_bytes)
         else:
+            processed_image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+        # 2. Upscaling (Manual Caching with Session State)
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
+            # Cache Key
+            cache_key = f"{uploaded_file.name}_{remove_bg}_{scale}_{grid_n}_overlap"
+            if "upscale_cache" not in st.session_state:
+                st.session_state.upscale_cache = {}
+            if cache_key in st.session_state.upscale_cache:
+                processed_image = st.session_state.upscale_cache[cache_key]
+                st.info("✅ Loaded upscaled image from cache (Instant!)")
+            else:
+                progress_bar = st.progress(0, text="Initializing AI models...")
+                processed_image = process_tiled_upscale(processed_image, scale, grid_n, progress_bar)
+                progress_bar.empty()
+                st.session_state.upscale_cache[cache_key] = processed_image
+        # 3. Geometry
         final_image = processed_image.copy()
         if rotate_angle != 0:
             final_image = final_image.rotate(rotate_angle, expand=True)
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Original")
+            st.image(Image.open(io.BytesIO(file_bytes)), use_container_width=True)
+            st.caption(f"Size: {Image.open(io.BytesIO(file_bytes)).size}")
         with col2:
             st.subheader("Result")
             st.image(final_image, use_container_width=True)
             st.caption(f"Size: {final_image.size}")
         st.markdown("---")
         st.download_button(
             label="💾 Download Result (PNG)",