Spaces:

AkashKumarave
/

dp

Running

App Files Files Community

AkashKumarave commited on May 1, 2025

Commit

901bf30

verified ·

1 Parent(s): 9ca6126

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -157

app.py CHANGED Viewed

@@ -1,187 +1,115 @@
 import gradio as gr
 import torch
-from transformers import pipeline
-from PIL import Image
 import numpy as np
-import io
-import base64
-import sys
-# Configure error logging
-import logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("BackgroundRemover")
-# Initialize the segmentation model for background removal
-segmenter = None
-def init_model():
-    global segmenter
-    try:
-        # Using RMBG-1.4 which is specifically designed for background removal
-        logger.info("Loading RMBG-1.4 model...")
-        segmenter = pipeline(
-            "image-segmentation",
-            model="briaai/RMBG-1.4",
-            device=0 if torch.cuda.is_available() else -1,
-            trust_remote_code=True  # Allow custom code execution for the model
-        )
-        logger.info("Successfully loaded RMBG-1.4 model")
-    except Exception as e:
-        logger.error(f"Error loading RMBG model: {e}")
-        # Fallback to a more standard segmentation model that doesn't require custom code
-        try:
-            logger.info("Attempting to load fallback model...")
-            segmenter = pipeline(
-                "image-segmentation",
-                model="facebook/detr-resnet-50-panoptic",
-                device=0 if torch.cuda.is_available() else -1
-            )
-            logger.info("Using fallback model: facebook/detr-resnet-50-panoptic")
-        except Exception as e2:
-            logger.error(f"Error loading fallback model: {e2}")
-            segmenter = None
-def remove_background(input_image):
-    """Remove background from an image using segmentation."""
-    global segmenter
-    # Initialize model if not already done
-    if segmenter is None:
-        init_model()
-    if segmenter is None:
-        logger.error("No segmentation model available")
-        return input_image
     if input_image is None:
-        logger.error("No input image provided")
         return None
     try:
-        # Convert input image to numpy array if it's not already
-        if isinstance(input_image, str):
-            input_img = Image.open(input_image)
-            input_array = np.array(input_img)
-        else:
-            input_array = np.array(input_image)
-        # Check if image is valid
-        if input_array.size == 0:
-            logger.error("Empty input image")
-            return input_image
-        logger.info(f"Processing image of shape {input_array.shape}")
-        # Run image segmentation
-        result = segmenter(input_image)
-        logger.info(f"Segmentation result type: {type(result)}")
-        # For the RMBG model, we directly get the mask
-        if isinstance(result, dict) and 'mask' in result:
-            # Direct mask from RMBG model
-            mask_array = np.array(result['mask'])
-            mask_array = mask_array / 255.0  # Normalize if needed
-            logger.info("Using RMBG mask")
-        elif isinstance(result, list) and len(result) > 0:
-            # Standard segmentation model output - try to create a foreground mask
-            foreground_classes = ['person', 'animal', 'vehicle', 'object']
-            # Initialize an empty mask
-            if len(input_array.shape) == 3:
-                mask_array = np.zeros((input_array.shape[0], input_array.shape[1]), dtype=np.float32)
-            else:
-                logger.error("Invalid input image shape")
-                return input_image
-            # Combine all foreground segments
-            for segment in result:
-                label = segment.get('label', '').lower()
-                # If it's a foreground class or we don't have specific classes to check
-                if any(fg_class in label for fg_class in foreground_classes) or not foreground_classes:
-                    segment_mask = segment.get('mask')
-                    if segment_mask is not None:
-                        # Resize mask if needed
-                        segment_mask = np.array(segment_mask)
-                        if segment_mask.shape[:2] != mask_array.shape:
-                            segment_mask = np.array(Image.fromarray(segment_mask).resize(
-                                (mask_array.shape[1], mask_array.shape[0])))
-                        # Add this segment to the foreground mask
-                        mask_array = np.maximum(mask_array, segment_mask)
-            logger.info("Created composite mask from segmentation model")
-        else:
-            logger.error("Unexpected model output format")
-            return input_image
-        # Create an RGBA image
-        if len(input_array.shape) == 3 and input_array.shape[2] >= 3:
-            rgba = np.zeros((input_array.shape[0], input_array.shape[1], 4), dtype=np.uint8)
-            rgba[:,:,:3] = input_array[:,:,:3]  # Copy RGB channels
-            # Apply mask to alpha channel
-            if 'briaai/RMBG' in str(segmenter.model):
-                # For RMBG model, use the mask directly
-                rgba[:,:,3] = (mask_array * 255).astype(np.uint8)
-            else:
-                # For other models, we may need to invert the mask
-                rgba[:,:,3] = (mask_array * 255).astype(np.uint8)
-            logger.info("Successfully created RGBA image")
-            return Image.fromarray(rgba)
-        else:
-            logger.error(f"Unexpected image format: shape {input_array.shape}")
-            return input_image
     except Exception as e:
-        logger.error(f"Error in background removal: {e}")
-        # Return original image if processing failed
-        return input_image
-# Initialize model on startup to avoid lazy loading during request
-init_model()
-# Create a simpler Gradio interface with minimal components to avoid internal errors
-with gr.Blocks(theme=gr.themes.Default(), css="footer {visibility: hidden}") as demo:
-    gr.Markdown(
-        """
-        # Space BG Erase Studio
-        Upload an image and the AI will remove its background, giving you a transparent PNG.
-        Powered by Hugging Face Transformers.
-        """
-    )
     with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload Image")
-            submit_btn = gr.Button("Remove Background", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Result (Transparent Background)")
-    # Simple click handler to avoid complex API handling
     submit_btn.click(
         fn=remove_background,
         inputs=input_image,
         outputs=output_image
     )
-    gr.Markdown(
-        """
-        ## How it works
-        This app uses a machine learning model specifically designed for background removal.
-        The result is a transparent PNG with only your subject visible.
-        ## Tips for best results
-        - Use images where the subject is clearly visible
-        - Good lighting helps the AI separate the subject from background
-        - The process may take a few seconds depending on image size
-        """
-    )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+import torch.nn.functional as F
 import numpy as np
+from PIL import Image
+import cv2
+import os
+# Ensure models directory is accessible
+try:
+    from models.isnet import ISNetGT
+except ImportError:
+    raise ImportError("Could not import ISNetGT from models.isnet. Ensure models/isnet.py is in the Space.")
+# Define model loading function
+def load_model(model_path="isnet-general-use.pth"):
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file {model_path} not found. Upload it to the Space root directory.")
+    model = ISNetGT()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.to(device).eval()
+    return model, device
+# Image preprocessing function
+def preprocess_image(image, target_size=(1024, 1024)):
+    # Convert PIL Image to numpy array
+    image = np.array(image)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    # Resize image while preserving aspect ratio
+    h, w = image.shape[:2]
+    scale = min(target_size[0] / h, target_size[1] / w)
+    new_h, new_w = int(h * scale), int(w * scale)
+    image_resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
+    # Pad to target size
+    padded_image = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
+    padded_image[:new_h, :new_w] = image_resized
+    # Normalize and convert to tensor
+    image_tensor = torch.from_numpy(padded_image).permute(2, 0, 1).float() / 255.0
+    image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension
+    return image_tensor, (new_h, new_w), (h, w)
+# Inference function
+def inference(model, image_tensor, device):
+    image_tensor = image_tensor.to(device)
+    with torch.no_grad():
+        output = model(image_tensor)[0]  # Get segmentation output
+        output = F.interpolate(output, size=image_tensor.shape[2:], mode='bilinear', align_corners=True)
+        output = torch.sigmoid(output).cpu().numpy()[0, 0]  # Convert to probability map
+    return output
+# Post-processing function
+def postprocess_output(output, original_size, resized_size):
+    # Resize mask to resized image size, then to original size
+    mask = cv2.resize(output, resized_size[::-1], interpolation=cv2.INTER_LANCZOS4)
+    mask = cv2.resize(mask, original_size[::-1], interpolation=cv2.INTER_LANCZOS4)
+    mask = (mask > 0.5).astype(np.uint8) * 255  # Binarize mask
+    return mask
+# Background removal function
+def remove_background(input_image):
     if input_image is None:
         return None
     try:
+        # Load model
+        model, device = load_model()
+        # Preprocess image
+        image_tensor, resized_size, original_size = preprocess_image(input_image)
+        # Run inference
+        mask = inference(model, image_tensor, device)
+        # Post-process mask
+        mask = postprocess_output(mask, original_size, resized_size)
+        # Apply mask to create transparent image
+        input_array = np.array(input_image)
+        alpha = mask
+        rgba = np.zeros((input_array.shape[0], input_array.shape[1], 4), dtype=np.uint8)
+        rgba[..., :3] = input_array
+        rgba[..., 3] = alpha
+        # Convert to PIL Image
+        output_image = Image.fromarray(rgba, mode='RGBA')
+        return output_image
     except Exception as e:
+        return f"Error: {str(e)}"
+# Set up Gradio Blocks interface
+with gr.Blocks(title="DIS Background Remover") as demo:
+    gr.Markdown("## DIS Background Remover")
+    gr.Markdown("Upload an image to remove its background using the IS-Net model from xuebinqin/DIS.")
     with gr.Row():
+        input_image = gr.Image(type="pil", label="Upload Image")
+        output_image = gr.Image(type="pil", label="Image with Background Removed")
+    submit_btn = gr.Button("Remove Background")
     submit_btn.click(
         fn=remove_background,
         inputs=input_image,
         outputs=output_image
     )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()