Spaces:

lukeafullard
/

ImageProcessing

Sleeping

App Files Files Community

lukeafullard commited on Jan 3

Commit

02623e7

verified ·

1 Parent(s): f753191

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +176 -72

src/streamlit_app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import streamlit as st
 from PIL import Image
 import torch
 from torchvision import transforms
-from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution
 import io
 import numpy as np
 import gc
@@ -10,33 +11,49 @@ import gc
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
-# --- 1. MODEL LOADING ---
 @st.cache_resource
-def load_rembg_model():
-    # RMBG-1.4 (Fast & High Quality)
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     return model, device
 @st.cache_resource
 def load_upscaler(scale=2):
     if scale == 4:
-        # FIXED: Use the 'RealWorld' model for 4x. It exists and handles artifacts better.
         model_id = "caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr"
     else:
-        # 2x Classical Model
         model_id = "caidas/swin2SR-classical-sr-x2-64"
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
-# --- 2. PROCESSING LOGIC ---
 def find_mask_tensor(output):
-    """Recursively finds the mask tensor."""
     if isinstance(output, torch.Tensor):
         if output.dim() == 4 and output.shape[1] == 1: return output
         elif output.dim() == 3 and output.shape[0] == 1: return output
@@ -48,112 +65,188 @@ def find_mask_tensor(output):
             if found is not None: return found
     return None
-def run_swin_inference(image, processor, model):
-    inputs = processor(image, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-    output = np.moveaxis(output, 0, -1)
-    output = (output * 255.0).round().astype(np.uint8)
-    return Image.fromarray(output)
-def upscale_chunk_logic(image, processor, model):
-    if image.mode == 'RGBA':
-        r, g, b, a = image.split()
-        rgb_image = Image.merge('RGB', (r, g, b))
-        upscaled_rgb = run_swin_inference(rgb_image, processor, model)
-        upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
-        return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
-    else:
-        return run_swin_inference(image, processor, model)
-@st.cache_data(show_spinner=False)
-def process_background_removal(image_bytes):
-    """Cached background removal (RMBG-1.4)."""
-    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-    model, device = load_rembg_model()
     w, h = image.size
-    transform_image = transforms.Compose([
-        transforms.Resize((1024, 1024)),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
-    input_images = transform_image(image).unsqueeze(0).to(device)
     with torch.no_grad():
-        outputs = model(input_images)
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None: result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
     pred = result_tensor.squeeze().cpu()
     if pred.max() > 1 or pred.min() < 0: pred = pred.sigmoid()
     pred_pil = transforms.ToPILImage()(pred)
-    mask = pred_pil.resize((w, h))
     image.putalpha(mask)
     return image
 def process_tiled_upscale(image, scale_factor, grid_n, progress_bar):
-    """
-    Tiled upscaling with OVERLAP to prevent seams.
-    """
     processor, model = load_upscaler(scale_factor)
     w, h = image.size
     rows = cols = grid_n
     tile_w = w // cols
     tile_h = h // rows
-    # Overlap buffer (pixels)
     overlap = 32
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
-            # Target Area
             target_left = x * tile_w
             target_upper = y * tile_h
             target_right = w if x == cols - 1 else (x + 1) * tile_w
             target_lower = h if y == rows - 1 else (y + 1) * tile_h
-            target_w = target_right - target_left
-            target_h = target_lower - target_upper
-            # Source Area (with overlap)
             source_left = max(0, target_left - overlap)
             source_upper = max(0, target_upper - overlap)
             source_right = min(w, target_right + overlap)
             source_lower = min(h, target_lower + overlap)
             tile = image.crop((source_left, source_upper, source_right, source_lower))
             upscaled_tile = upscale_chunk_logic(tile, processor, model)
-            # Calculate offsets for cropping the valid center
             extra_left = target_left - source_left
             extra_upper = target_upper - source_upper
             crop_x = extra_left * scale_factor
             crop_y = extra_upper * scale_factor
             crop_w = target_w * scale_factor
             crop_h = target_h * scale_factor
             clean_tile = upscaled_tile.crop((crop_x, crop_y, crop_x + crop_w, crop_y + crop_h))
             paste_x = target_left * scale_factor
             paste_y = target_upper * scale_factor
             full_image.paste(clean_tile, (paste_x, paste_y))
             del tile, upscaled_tile, clean_tile
             gc.collect()
             count += 1
-            progress_bar.progress(count / total_tiles, text=f"Upscaling Tile {count}/{total_tiles} (with overlap)...")
     return full_image
 def convert_image_to_bytes(img):
@@ -161,16 +254,27 @@ def convert_image_to_bytes(img):
     img.save(buf, format="PNG")
     return buf.getvalue()
-# --- 3. MAIN APP ---
 def main():
-    st.title("✨ AI Image Lab: Final Edition")
-    st.markdown("Features: **RMBG-1.4** | **Swin2SR (Seamless Tiling)** | **Progress Bar**")
     # --- Sidebar ---
-    st.sidebar.header("1. Background")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
     st.sidebar.header("2. AI Upscaling")
     upscale_mode = st.sidebar.radio("Magnification", ["None", "2x", "4x"])
@@ -188,25 +292,27 @@ def main():
     if uploaded_file is not None:
         file_bytes = uploaded_file.getvalue()
-        # 1. Background Removal
         if remove_bg:
-            processed_image = process_background_removal(file_bytes)
         else:
             processed_image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
-        # 2. Upscaling (Manual Caching with Session State)
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
-            # Cache Key
-            cache_key = f"{uploaded_file.name}_{remove_bg}_{scale}_{grid_n}_overlap_v4"
             if "upscale_cache" not in st.session_state:
                 st.session_state.upscale_cache = {}
             if cache_key in st.session_state.upscale_cache:
                 processed_image = st.session_state.upscale_cache[cache_key]
-                st.info("✅ Loaded upscaled image from cache (Instant!)")
             else:
                 progress_bar = st.progress(0, text="Initializing AI models...")
                 processed_image = process_tiled_upscale(processed_image, scale, grid_n, progress_bar)
@@ -223,12 +329,10 @@ def main():
         with col1:
             st.subheader("Original")
             st.image(Image.open(io.BytesIO(file_bytes)), use_container_width=True)
-            st.caption(f"Size: {Image.open(io.BytesIO(file_bytes)).size}")
         with col2:
             st.subheader("Result")
             st.image(final_image, use_container_width=True)
-            st.caption(f"Size: {final_image.size}")
         st.markdown("---")
         st.download_button(

 import streamlit as st
 from PIL import Image
 import torch
+import torch.nn.functional as F
 from torchvision import transforms
+from transformers import AutoModelForImageSegmentation, AutoImageProcessor, Swin2SRForImageSuperResolution, VitMatteForImageMatting
 import io
 import numpy as np
 import gc
 # Page Configuration
 st.set_page_config(layout="wide", page_title="AI Image Lab")
+# --- 1. MODEL LOADING (Cached) ---
 @st.cache_resource
+def load_rmbg_model():
+    """Option 1: The Lightweight Specialist"""
     model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4", trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     return model, device
+@st.cache_resource
+def load_birefnet_model():
+    """Option 2: The Heavyweight Generalist"""
+    # This requires 'timm' installed
+    model = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    return model, device
+@st.cache_resource
+def load_vitmatte_model():
+    """Option 3: The Refiner (Matting)"""
+    # VitMatte requires a rough mask first (we use RMBG for that)
+    processor = AutoImageProcessor.from_pretrained("hustvl/vitmatte-small-composition-1k")
+    model = VitMatteForImageMatting.from_pretrained("hustvl/vitmatte-small-composition-1k")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    return processor, model, device
 @st.cache_resource
 def load_upscaler(scale=2):
     if scale == 4:
         model_id = "caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr"
     else:
         model_id = "caidas/swin2SR-classical-sr-x2-64"
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
     return processor, model
+# --- 2. HELPER FUNCTIONS ---
 def find_mask_tensor(output):
+    """Recursively finds the mask tensor in complex model outputs."""
     if isinstance(output, torch.Tensor):
         if output.dim() == 4 and output.shape[1] == 1: return output
         elif output.dim() == 3 and output.shape[0] == 1: return output
             if found is not None: return found
     return None
+def generate_trimap(mask_tensor, erode_kernel_size=10, dilate_kernel_size=10):
+    """
+    Generates a trimap (Foreground, Background, Unknown) from a binary mask
+    using Pure PyTorch (No OpenCV required).
+    Values: 1=FG, 0=BG, 0.5=Unknown (Edge)
+    """
+    # Ensure mask is Bx1xHxW
+    if mask_tensor.dim() == 3: mask_tensor = mask_tensor.unsqueeze(0)
+    # Create kernels
+    erode_k = erode_kernel_size
+    dilate_k = dilate_kernel_size
+    # Dilation (Max Pooling) - Expands the white area
+    # We pad to keep size same
+    dilated = F.max_pool2d(mask_tensor, kernel_size=dilate_k, stride=1, padding=dilate_k//2)
+    # Erosion (Negative Max Pooling) - Shrinks the white area
+    eroded = -F.max_pool2d(-mask_tensor, kernel_size=erode_k, stride=1, padding=erode_k//2)
+    # Trimap construction
+    # Pixels that are 1 in eroded are definitely FG (1.0)
+    # Pixels that are 0 in dilated are definitely BG (0.0)
+    # Everything else is the "Unknown" zone (0.5)
+    # Start with Unknown (0.5)
+    trimap = torch.full_like(mask_tensor, 0.5)
+    # Set definites
+    trimap[eroded > 0.5] = 1.0
+    trimap[dilated < 0.5] = 0.0
+    return trimap
+# --- 3. INFERENCE LOGIC ---
+def inference_segmentation(model, image, device, resolution=1024):
+    """Generic inference for RMBG and BiRefNet."""
     w, h = image.size
+    transform = transforms.Compose([
+        transforms.Resize((resolution, resolution)),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
+    input_tensor = transform(image).unsqueeze(0).to(device)
     with torch.no_grad():
+        outputs = model(input_tensor)
     result_tensor = find_mask_tensor(outputs)
     if result_tensor is None: result_tensor = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
     if not isinstance(result_tensor, torch.Tensor):
          if isinstance(result_tensor, (list, tuple)): result_tensor = result_tensor[0]
+    # Get binary-ish mask (logits or sigmoid)
     pred = result_tensor.squeeze().cpu()
     if pred.max() > 1 or pred.min() < 0: pred = pred.sigmoid()
+    # Resize back to original
     pred_pil = transforms.ToPILImage()(pred)
+    mask = pred_pil.resize((w, h), resample=Image.LANCZOS)
+    return mask
+def inference_vitmatte(image, device):
+    """
+    Runs pipeline: RMBG (Rough Mask) -> Trimap -> VitMatte (Refined Mask)
+    """
+    # 1. Get Rough Mask using RMBG (Fast)
+    rmbg_model, _ = load_rmbg_model() # Re-use loaded model
+    rough_mask_pil = inference_segmentation(rmbg_model, image, device, resolution=1024)
+    # 2. Create Trimap
+    # Convert PIL mask to Tensor
+    mask_tensor = transforms.ToTensor()(rough_mask_pil).to(device)
+    # Generate trimap (1=FG, 0=BG, 0.5=Unknown)
+    trimap_tensor = generate_trimap(mask_tensor, erode_kernel_size=25, dilate_kernel_size=25)
+    # 3. VitMatte Inference
+    processor, model, _ = load_vitmatte_model()
+    # VitMatte expects inputs: pixel_values (image) and mask_labels (trimap)
+    inputs = processor(images=image, trimaps=trimap_tensor, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Output is the refined alphas
+    alphas = outputs.alphas
+    # 4. Post-process
+    # Extract alpha, resize to original
+    alpha_np = alphas.squeeze().cpu().numpy()
+    alpha_pil = Image.fromarray((alpha_np * 255).astype("uint8"), mode="L")
+    alpha_pil = alpha_pil.resize(image.size, resample=Image.LANCZOS)
+    return alpha_pil
+@st.cache_data(show_spinner=False)
+def process_background_removal(image_bytes, method="RMBG-1.4"):
+    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    if method == "RMBG-1.4":
+        model, device = load_rmbg_model()
+        mask = inference_segmentation(model, image, device)
+    elif method == "BiRefNet (Heavy)":
+        model, device = load_birefnet_model()
+        mask = inference_segmentation(model, image, device, resolution=1024)
+    elif method == "VitMatte (Refiner)":
+        # VitMatte needs GPU ideally, works on CPU but slow
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        mask = inference_vitmatte(image, device)
+    else:
+        # Fallback
+        return image
+    # Apply mask
     image.putalpha(mask)
     return image
+# --- Upscaling Logic (Same as before) ---
+def run_swin_inference(image, processor, model):
+    inputs = processor(image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.moveaxis(output, 0, -1)
+    output = (output * 255.0).round().astype(np.uint8)
+    return Image.fromarray(output)
+def upscale_chunk_logic(image, processor, model):
+    if image.mode == 'RGBA':
+        r, g, b, a = image.split()
+        rgb_image = Image.merge('RGB', (r, g, b))
+        upscaled_rgb = run_swin_inference(rgb_image, processor, model)
+        upscaled_a = a.resize(upscaled_rgb.size, Image.Resampling.LANCZOS)
+        return Image.merge('RGBA', (*upscaled_rgb.split(), upscaled_a))
+    else:
+        return run_swin_inference(image, processor, model)
 def process_tiled_upscale(image, scale_factor, grid_n, progress_bar):
     processor, model = load_upscaler(scale_factor)
     w, h = image.size
     rows = cols = grid_n
     tile_w = w // cols
     tile_h = h // rows
     overlap = 32
     full_image = Image.new(image.mode, (w * scale_factor, h * scale_factor))
     total_tiles = rows * cols
     count = 0
     for y in range(rows):
         for x in range(cols):
             target_left = x * tile_w
             target_upper = y * tile_h
             target_right = w if x == cols - 1 else (x + 1) * tile_w
             target_lower = h if y == rows - 1 else (y + 1) * tile_h
             source_left = max(0, target_left - overlap)
             source_upper = max(0, target_upper - overlap)
             source_right = min(w, target_right + overlap)
             source_lower = min(h, target_lower + overlap)
             tile = image.crop((source_left, source_upper, source_right, source_lower))
             upscaled_tile = upscale_chunk_logic(tile, processor, model)
+            target_w = target_right - target_left
+            target_h = target_lower - target_upper
             extra_left = target_left - source_left
             extra_upper = target_upper - source_upper
             crop_x = extra_left * scale_factor
             crop_y = extra_upper * scale_factor
             crop_w = target_w * scale_factor
             crop_h = target_h * scale_factor
             clean_tile = upscaled_tile.crop((crop_x, crop_y, crop_x + crop_w, crop_y + crop_h))
             paste_x = target_left * scale_factor
             paste_y = target_upper * scale_factor
             full_image.paste(clean_tile, (paste_x, paste_y))
             del tile, upscaled_tile, clean_tile
             gc.collect()
             count += 1
+            progress_bar.progress(count / total_tiles, text=f"Upscaling Tile {count}/{total_tiles}...")
     return full_image
 def convert_image_to_bytes(img):
     img.save(buf, format="PNG")
     return buf.getvalue()
+# --- 4. MAIN APP ---
 def main():
+    st.title("✨ AI Image Lab: Ultimate Edition")
+    st.markdown("Features: **Multi-Model Background** | **Swin2SR** | **Progress Bar**")
     # --- Sidebar ---
+    st.sidebar.header("1. Background Removal")
     remove_bg = st.sidebar.checkbox("Remove Background", value=False)
+    # NEW: Model Selector
+    if remove_bg:
+        bg_model = st.sidebar.selectbox(
+            "Select AI Model",
+            ["RMBG-1.4", "BiRefNet (Heavy)", "VitMatte (Refiner)"],
+            index=0,
+            help="RMBG: Fast, Standard Quality.\nBiRefNet: Slower, Better Edges.\nVitMatte: Slowest, Best for Hair/Transparency."
+        )
+    else:
+        bg_model = "None"
     st.sidebar.header("2. AI Upscaling")
     upscale_mode = st.sidebar.radio("Magnification", ["None", "2x", "4x"])
     if uploaded_file is not None:
         file_bytes = uploaded_file.getvalue()
+        # 1. Background
         if remove_bg:
+            # We add the model name to the spinner text so user knows what's happening
+            with st.spinner(f"Removing background using {bg_model}..."):
+                processed_image = process_background_removal(file_bytes, bg_model)
         else:
             processed_image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+        # 2. Upscaling
         if upscale_mode != "None":
             scale = 4 if "4x" in upscale_mode else 2
+            # Cache Key includes model name now
+            cache_key = f"{uploaded_file.name}_{bg_model}_{scale}_{grid_n}_v5"
             if "upscale_cache" not in st.session_state:
                 st.session_state.upscale_cache = {}
             if cache_key in st.session_state.upscale_cache:
                 processed_image = st.session_state.upscale_cache[cache_key]
+                st.info("✅ Loaded upscaled image from cache")
             else:
                 progress_bar = st.progress(0, text="Initializing AI models...")
                 processed_image = process_tiled_upscale(processed_image, scale, grid_n, progress_bar)
         with col1:
             st.subheader("Original")
             st.image(Image.open(io.BytesIO(file_bytes)), use_container_width=True)
         with col2:
             st.subheader("Result")
             st.image(final_image, use_container_width=True)
         st.markdown("---")
         st.download_button(