Spaces:

NeuralFalcon
/

Remove-Background

Runtime error

App Files Files Community

NeuralFalcon commited on Jul 25, 2025

Commit

d62e696

verified ·

1 Parent(s): 863515e

Upload 7 files

Browse files

Files changed (7) hide show

app.py +86 -0
helper.py +376 -0
packages.txt +1 -0
pixelwise_estimator.py +114 -0
requirement.txt +7 -0
soft_foreground_segmenter.py +78 -0
utils.py +163 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import gradio as gr
+from gradio_imageslider import ImageSlider
+from helper import create_transparent_foreground,remove_background_batch_images,remove_background_from_video
+from soft_foreground_segmenter import SoftForegroundSegmenter
+foreground_model = "foreground-segmentation-model-vitl16_384.onnx"
+foreground_segmenter = SoftForegroundSegmenter(onnx_model=foreground_model)
+def process_image(image_path):
+    original, transparent, output_image_path = create_transparent_foreground(image_path,foreground_segmenter)
+    return (original, transparent), output_image_path
+def ui1():
+  with gr.Blocks() as demo:
+      gr.Markdown("## 🪄 Background Remove From Image")
+      with gr.Row():
+        with gr.Column():
+          image_input = gr.Image(type="filepath", label="Upload Image")
+          btn = gr.Button("Remove Background")
+        with gr.Column():
+          image_slider = ImageSlider(label="Before vs After",position=0.5)
+          save_path_box = gr.File(label="Download Transparent Image")
+      btn.click(
+          fn=process_image,
+          inputs=image_input,
+          outputs=[image_slider, save_path_box]
+      )
+      gr.Examples(
+        examples=[["./assets/cat.png"],["./assets/girl.jpg"],["./assets/dog.jpg"]],
+        inputs=[image_input],
+        outputs=[image_slider, save_path_box],
+        fn=process_image,
+        cache_examples=True,
+    )
+  return demo
+def process_uploaded_images(uploaded_images):
+    return remove_background_batch_images(uploaded_images,foreground_segmenter)
+def ui2():
+  with gr.Blocks() as demo:
+      gr.Markdown("## 🪄 Background Remover From Bulk Images")
+      with gr.Row():
+          with gr.Column():
+              image_input = gr.File(file_types=["image"], file_count="multiple", label="Upload Multiple Images")
+              submit_btn = gr.Button("Remove Backgrounds")
+          with gr.Column():
+              zip_output = gr.File(label="Download ZIP")
+      submit_btn.click(fn=process_uploaded_images, inputs=image_input, outputs=zip_output)
+  return demo
+def process_video(video_file):
+    output_path = remove_background_from_video(video_file, foreground_segmenter)
+    return output_path  # should be absolute or relative path to processed video
+def ui3():
+  # --- Gradio Interface ---
+  with gr.Blocks() as demo:
+      gr.Markdown("## 🎥 Remove Background From Video")
+      with gr.Row():
+          with gr.Column():
+              input_video = gr.Video(label="Upload Video (.mp4)")
+              run_btn = gr.Button("Remove Background")
+          with gr.Column():
+              output_video = gr.Video(label="Green Screen Video")
+      run_btn.click(fn=process_video, inputs=input_video, outputs=output_video)
+    #   gr.Examples(
+    #     examples=[["./assets/video.mp4"]],
+    #     inputs=[input_video],
+    #     outputs=[output_video],
+    #     fn=process_video,
+    #     cache_examples=True,
+    # )
+  return demo
+demo1=ui1()
+demo2=ui2()
+demo3=ui3()
+demo = gr.TabbedInterface([demo1, demo2,demo3],["Background Remove From Image","Background Remover From Bulk Images","Remove Background From Video"],title="Microsoft DAViD Background Remove")
+demo.queue().launch(debug=True, share=True)

helper.py ADDED Viewed

	@@ -0,0 +1,376 @@

+import cv2
+import numpy as np
+import os
+import shutil
+import subprocess
+import glob
+from tqdm.auto import tqdm
+import uuid
+import re
+from zipfile import ZipFile
+gpu = False
+os.makedirs("./results",exist_ok=True)
+def apply_green_screen(image_path, save_path,foreground_segmenter):
+    """
+    Replaces the background of the input image with green using a segmentation model.
+    Args:
+        image_path (str): Path to the input image.
+        segmenter (SoftForegroundSegmenter): Initialized segmentation model.
+        save_path (str, optional): If provided, saves the result to this path.
+    Returns:
+        np.ndarray: The green screen composited image.
+    """
+    # Load image with alpha if available
+    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    if image is None:
+        raise FileNotFoundError(f"Image not found: {image_path}")
+    # Remove transparency if present
+    if image.shape[2] == 4:
+        image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
+    # Convert to RGB for the model
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Get segmentation mask
+    mask = foreground_segmenter.estimate_foreground_segmentation(image_rgb)
+    # Normalize and convert mask to 0-255 uint8
+    if mask.max() <= 1.0:
+        mask = (mask * 255).astype(np.uint8)
+    else:
+        mask = mask.astype(np.uint8)
+    if mask.ndim == 2:
+        mask_gray = mask
+    elif mask.shape[2] == 1:
+        mask_gray = mask[:, :, 0]
+    else:
+        mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+    _, binary_mask = cv2.threshold(mask_gray, 128, 255, cv2.THRESH_BINARY)
+    # Create green background
+    green_bg = np.full_like(image_rgb, (0, 255, 0), dtype=np.uint8)
+    # Create 3-channel mask
+    mask_3ch = cv2.cvtColor(binary_mask, cv2.COLOR_GRAY2BGR)
+    # Composite: foreground from image, background as green
+    output_rgb = np.where(mask_3ch == 255, image_rgb, green_bg)
+    # Convert back to BGR for OpenCV
+    output_bgr = cv2.cvtColor(output_rgb, cv2.COLOR_RGB2BGR)
+    # Save if path is given
+    if save_path:
+        cv2.imwrite(save_path, output_bgr)
+    return output_bgr
+def create_transparent_foreground(image_path,foreground_segmenter):
+    uid = uuid.uuid4().hex[:8].upper()
+    base_name = os.path.splitext(os.path.basename(image_path))[0]
+    base_name = re.sub(r'[^a-zA-Z\s]', '', base_name)
+    base_name = base_name.strip().replace(" ", "_").replace("__","_")
+    save_path = f"./results/{base_name}_{uid}.png"
+    save_path = os.path.abspath(save_path)
+    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    if image is None:
+        raise FileNotFoundError(f"Image not found: {image_path}")
+    if image.shape[2] == 4:
+        image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    mask = foreground_segmenter.estimate_foreground_segmentation(image_rgb)
+    if mask.max() <= 1.0:
+        mask = (mask * 255).astype(np.uint8)
+    else:
+        mask = mask.astype(np.uint8)
+    if mask.ndim == 3 and mask.shape[2] == 3:
+        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+    _, alpha = cv2.threshold(mask, 128, 255, cv2.THRESH_BINARY)
+    rgba_image = np.dstack((image_rgb, alpha))
+    cv2.imwrite(save_path, cv2.cvtColor(rgba_image, cv2.COLOR_RGBA2BGRA))
+    return image_rgb, rgba_image, save_path
+def remove_background_batch_images(img_list, foreground_segmenter):
+    # Create unique temp directory
+    uid = uuid.uuid4().hex[:8].upper()
+    temp_dir = os.path.abspath(f"./results/bg_removed_{uid}")
+    os.makedirs(temp_dir, exist_ok=True)
+    # Process each image
+    for image_path in tqdm(img_list, desc="Removing Backgrounds"):
+        _, _, save_path = create_transparent_foreground(image_path, foreground_segmenter)
+        shutil.move(save_path, os.path.join(temp_dir, os.path.basename(save_path)))
+    # Create zip file
+    zip_path = f"{temp_dir}.zip"
+    with ZipFile(zip_path, 'w') as zipf:
+        for root, _, files in os.walk(temp_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, start=temp_dir)
+                zipf.write(file_path, arcname=arcname)
+    # shutil.rmtree(temp_dir)
+    return os.path.abspath(zip_path)
+def get_sorted_paths(directory, extension="png"):
+    """
+    Returns full paths of all images with the given extension, sorted by filename (without extension).
+    """
+    extension = extension.lstrip(".").lower()
+    pattern = os.path.join(directory, f"*.{extension}")
+    files = glob.glob(pattern)
+    files.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
+    return files
+def extract_all_frames_ffmpeg_gpu(video_path, output_dir="frames", extension="png", use_gpu=True):
+    """
+    Extracts all frames from a video using ffmpeg, with optional GPU acceleration.
+    Returns a sorted list of full paths to the extracted frames.
+    """
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+    extension = extension.lstrip(".")
+    output_pattern = os.path.join(output_dir, f"%05d.{extension}")
+    command = [
+        "ffmpeg", "-i", video_path, output_pattern
+    ]
+    if use_gpu:
+        command.insert(1, "cuda")
+        command.insert(1, "-hwaccel")
+    print("Running command:", " ".join(command))
+    subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return get_sorted_paths(output_dir, extension)
+def green_screen_batch(frames, foreground_segmenter,output_dir="green_screen_frames"):
+    """
+    Applies green screen background to a batch of frames and saves the results.
+    Args:
+        frames (List[str]): List of image paths.
+        output_dir (str): Directory to save green-screened output.
+    """
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+    green_screen_frames=[]
+    for frame in tqdm(frames, desc="Processing green screen frames"):
+        save_image_path=os.path.join(output_dir, os.path.basename(frame))
+        result = apply_green_screen(
+            frame,
+            save_image_path,
+            foreground_segmenter
+        )
+        green_screen_frames.append(save_image_path)
+    return green_screen_frames
+def green_screen_video_maker(original_video, green_screen_frames, batch_size=100):
+    """
+    Creates video chunks from green screen frames based on original video's properties.
+    Args:
+        original_video (str): Path to the original video file (to read FPS, size).
+        green_screen_frames (List[str]): List of green screen frame paths.
+        batch_size (int): Number of frames per chunked video.
+    """
+    temp_folder = "temp_video"
+    if os.path.exists(temp_folder):
+        shutil.rmtree(temp_folder)
+    os.makedirs(temp_folder, exist_ok=True)
+    # Get video info from original video
+    cap = cv2.VideoCapture(original_video)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    cap.release()
+    total_frames = len(green_screen_frames)
+    num_chunks = (total_frames + batch_size - 1) // batch_size  # Ceiling division
+    for chunk_idx in tqdm(range(num_chunks), desc="Processing video chunks"):
+        chunk_path = os.path.join(temp_folder, f"{chunk_idx+1}.mp4")
+        out = cv2.VideoWriter(chunk_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
+        start_idx = chunk_idx * batch_size
+        end_idx = min(start_idx + batch_size, total_frames)
+        for frame_path in green_screen_frames[start_idx:end_idx]:
+            frame = cv2.imread(frame_path)
+            frame = cv2.resize(frame, (width, height))  # Ensure matching resolution
+            out.write(frame)
+        out.release()
+def merge_video_chunks(output_path="final_video.mp4", temp_folder="temp_video", use_gpu=True):
+    """
+    Merges all video chunks from temp_folder into a final single video.
+    """
+    os.makedirs("./results", exist_ok=True)
+    output_path = f"../results/{output_path}"  # relative to temp_folder
+    file_list_path = os.path.join(temp_folder, "chunks.txt")
+    chunk_files=sorted(
+            [f for f in os.listdir(temp_folder) if f.lower().endswith("mp4")],
+            key=lambda x: int(os.path.splitext(x)[0])
+        )
+    with open(file_list_path, "w") as f:
+        for chunk in chunk_files:
+            f.write(f"file '{chunk}'\n")  # ✅ No './' prefix
+    ffmpeg_cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", "chunks.txt"]
+    if use_gpu:
+        ffmpeg_cmd += ["-c:v", "h264_nvenc", "-preset", "fast"]
+    else:
+        ffmpeg_cmd += ["-c", "copy"]
+    ffmpeg_cmd.append(output_path)
+    # ✅ Run from inside temp_folder, so chunks.txt and mp4 files are local
+    subprocess.run(ffmpeg_cmd, cwd=temp_folder, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+def extract_audio_from_video(video_path, output_audio_path="output_audio.wav", format="wav", sample_rate=16000, channels=1):
+    """
+    Extracts audio from a video file using ffmpeg.
+    Args:
+        video_path (str): Path to the input video file.
+        output_audio_path (str): Path to save the extracted audio (e.g., .wav or .mp3).
+        format (str): 'wav' or 'mp3'
+        sample_rate (int): Sampling rate in Hz (e.g., 16000 for ASR models)
+        channels (int): Number of audio channels (1=mono, 2=stereo)
+    """
+    # Ensure the output directory exists
+    os.makedirs(os.path.dirname(output_audio_path) or ".", exist_ok=True)
+    # Build ffmpeg command
+    if format.lower() == "wav":
+        command = [
+            "ffmpeg", "-y",               # Overwrite output
+            "-i", video_path,            # Input video
+            "-vn",                       # Disable video
+            "-ac", str(channels),        # Audio channels (1 = mono)
+            "-ar", str(sample_rate),     # Audio sample rate
+            "-acodec", "pcm_s16le",      # WAV codec
+            output_audio_path
+        ]
+    elif format.lower() == "mp3":
+        command = [
+            "ffmpeg", "-y",
+            "-i", video_path,
+            "-vn",
+            "-ac", str(channels),
+            "-ar", str(sample_rate),
+            "-acodec", "libmp3lame",     # MP3 codec
+            output_audio_path
+        ]
+    else:
+        raise ValueError("Unsupported format. Use 'wav' or 'mp3'.")
+    # Run command silently
+    subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+def add_audio(video_path, audio_path, output_path, use_gpu=False):
+    """
+    Replaces the audio of a video with a new audio track.
+    Args:
+        video_path (str): Path to the video file.
+        audio_path (str): Path to the audio file.
+        output_path (str): Path where the final video will be saved.
+        use_gpu (bool): If True, use GPU-accelerated video encoding.
+    """
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    command = [
+        "ffmpeg", "-y",                     # Overwrite without asking
+        "-i", video_path,                  # Input video
+        "-i", audio_path,                  # Input audio
+        "-map", "0:v:0",                   # Use video from first input
+        "-map", "1:a:0",                   # Use audio from second input
+        "-shortest"                        # Trim to the shortest stream (audio/video)
+    ]
+    if use_gpu:
+        command += ["-c:v", "h264_nvenc", "-preset", "fast"]
+    else:
+        command += ["-c:v", "copy"]
+    command += ["-c:a", "aac", "-b:a", "192k", output_path]
+    subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+def remove_background_from_video(uploaded_video_path,foreground_segmenter):
+    # 🔁 Generate a single UUID to use for all related files
+    uid = uuid.uuid4().hex[:8].upper()
+    # Define all output paths using that UUID
+    base_name = os.path.splitext(os.path.basename(uploaded_video_path))[0]
+    base_name = re.sub(r'[^a-zA-Z\s]', '', base_name)
+    base_name = base_name.strip().replace(" ", "_")
+    temp_video_path = f"./results/{base_name}_chunks_{uid}.mp4"
+    audio_path = f"./results/{base_name}_audio_{uid}.wav"
+    final_output_path = f"./results/{base_name}_final_{uid}.mp4"
+    # Step 1: Extract frames
+    frames = extract_all_frames_ffmpeg_gpu(
+        video_path=uploaded_video_path,
+        output_dir="frames",
+        extension="png",
+        use_gpu=gpu
+    )
+    # Step 2: Remove background (green screen)
+    green_screen_frames = green_screen_batch(frames,foreground_segmenter)
+    # Step 3: Rebuild video from frames
+    green_screen_video_maker(uploaded_video_path, green_screen_frames, batch_size=100)
+    # Step 4: Merge video chunks
+    merge_video_chunks(output_path=os.path.basename(temp_video_path), use_gpu=gpu)
+    # Step 5: Extract original audio
+    extract_audio_from_video(uploaded_video_path, output_audio_path=audio_path)
+    # Step 6: Add audio back
+    add_audio(
+        video_path=temp_video_path,
+        audio_path=audio_path,
+        output_path=final_output_path,
+        use_gpu=True
+    )
+    return os.path.abspath(final_output_path)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

pixelwise_estimator.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# Copied From https://github.com/microsoft/DAViD/blob/main/runtime/pixelwise_estimator.py
+"""Runtime core for pixelwise estimators.
+Copyright (c) Microsoft Corporation.
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from pathlib import Path
+from typing import Optional, Union
+import numpy as np
+from onnxruntime import InferenceSession
+from utils import ONNX_EP, ModelNotFoundError, prepare_image_for_model, preprocess_img
+class RuntimeSession(InferenceSession):
+    """The runtime session."""
+    def __init__(self, onnx_model: Union[str, Path], providers: Optional[list[str]] = None) -> None:
+        """Create a runtime session.
+        Args:
+            onnx_model: The path to the onnx model.
+            providers: Optional list of ONNX execution providers to use, defaults to [GPU, CPU].
+        """
+        super().__init__(str(onnx_model), providers=providers or ONNX_EP)
+        self.onnx_model_path: Path = Path(onnx_model)
+    @property
+    def input_name(self) -> str:
+        """Get the name of the input tensor."""
+        return self.get_inputs()[0].name
+    def __call__(self, x: np.ndarray) -> list[np.ndarray]:
+        """Run the model on the input tensor."""
+        x = x.astype(np.float32)
+        return self.run(None, {self.input_name: x})
+class PixelwiseEstimator:
+    """Given an input image, estimates the pixelwise (dense) output (e.g., normal map, depth map, etc.)."""
+    def __init__(self, onnx_model: Union[str, Path], providers: Optional[list[str]] = None):
+        """Creates a pixelwise estimator.
+        Arguments:
+            onnx_model: Path to an ONNX model.
+            providers: Optional list of ONNX execution providers to use, defaults to [GPU, CPU].
+        Raises:
+            TypeError: If onnx_model is not a string or Path.
+            ModelNotFoundError: If the model file does not exist.
+            ModelError: If the provided model has an undeclared or incorrect roi type.
+        """
+        if not isinstance(onnx_model, (str, Path)):
+            raise TypeError(f"onnx_model should be a string or Path, got {type(onnx_model)}")
+        onnx_model = Path(onnx_model)
+        if not onnx_model.exists():
+            raise ModelNotFoundError(f"model {onnx_model} does not exist")
+        self.onnx_model = onnx_model
+        self.roi_size = 512
+        self.onnx_sess = RuntimeSession(str(onnx_model), providers=providers)
+    @staticmethod
+    def inference(input_img: np.ndarray, onnx_sess: RuntimeSession) -> np.ndarray:
+        """Predict the pixelwise (dense) map given an input image.
+        Args:
+            input_img: Input image.
+            onnx_sess: ONNX inference session.
+        Returns:
+            Predicted output map.
+        """
+        input_tensor = onnx_sess.get_inputs()[0]
+        input_name = input_tensor.name
+        input_shape = input_tensor.shape
+        input_img = np.transpose(input_img, (2, 0, 1)).reshape(1, *input_shape[1:])  # HWC to BCHW
+        pred_onnx = onnx_sess.run(None, {input_name: input_img.astype(np.float32)})
+        return pred_onnx
+    def _estimate_dense_map(self, image: np.ndarray) -> tuple[np.ndarray]:
+        """Estimating dense maps from image input."""
+        if not isinstance(image, np.ndarray):
+            raise TypeError(f"Image should be a numpy array, got {type(image)}")
+        image_bgr = preprocess_img(image)
+        processed_image, metadata = prepare_image_for_model(image_bgr, self.roi_size)
+        output = self.inference(processed_image, self.onnx_sess)
+        return output, metadata

requirement.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy==2.2.6
+onnx==1.18.0
+onnxruntime-gpu==1.22.0
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+gradio>=5.38.2
+gradio_imageslider==0.0.20

soft_foreground_segmenter.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# Copied From https://github.com/microsoft/DAViD/blob/main/runtime/soft_foreground_segmenter.py
+"""This module provides a SoftForegroundSegmenter which segments the foreground human subjects from the background.
+Copyright (c) Microsoft Corporation.
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from pathlib import Path
+from typing import Optional, Union
+import cv2
+import numpy as np
+from pixelwise_estimator import PixelwiseEstimator
+from utils import composite_model_output_to_image
+class SoftForegroundSegmenter(PixelwiseEstimator):
+    """Estimates the soft foreground segmentation mask of human in an image."""
+    def __init__(
+        self,
+        onnx_model: Union[str, Path],
+        providers: Optional[list[str]] = None,
+        binarization_threshold: Optional[float] = None,
+    ):
+        """Creates a soft foreground segmenter to segment the foreground human subjects in an image.
+        Arguments:
+            onnx_model: A path to an ONNX model.
+            providers: Optional list of ONNX execution providers to use, defaults to [GPU, CPU].
+            binarization_threshold: Threshold above which the mask is considered foreground. When None, the mask is returned as is.
+        Raises:
+            TypeError: if onnx_model is not a string or Path.
+            ModelNotFoundError: if the model file does not exist.
+        """
+        super().__init__(
+            onnx_model,
+            providers=providers,
+        )
+        self.binarization_threshold = binarization_threshold
+    def estimate_foreground_segmentation(self, image: np.ndarray) -> np.ndarray:
+        """Predict the soft foreground/background segmentation given input image."""
+        mask, metadata = self._estimate_dense_map(image)
+        mask = mask[0][0]
+        mask = np.transpose(mask, (1, 2, 0))
+        # post_process to get the final segmentation mask and composite it onto the original size
+        segmented_image = composite_model_output_to_image(mask, metadata, interp_mode=cv2.INTER_CUBIC)
+        # clip the mask to [0, 1]
+        segmented_image = np.clip(segmented_image, 0, 1)
+        # Apply threshold if binarization_threshold is set
+        if self.binarization_threshold:
+            return ((segmented_image > self.binarization_threshold) * 1).astype(np.uint8)
+        return segmented_image

utils.py ADDED Viewed

	@@ -0,0 +1,163 @@

+#Copied From https://github.com/microsoft/DAViD/blob/main/runtime/utils.py
+"""Utility classes and functions for image processing and ROI operations.
+Copyright (c) Microsoft Corporation.
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import cv2
+import numpy as np
+ONNX_EP = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+UINT8_MAX = np.iinfo(np.uint8).max
+UINT16_MAX = np.iinfo(np.uint16).max
+class ImageFormatError(Exception):
+    """Exception raised for invalid image formats."""
+    pass
+class ModelNotFoundError(Exception):
+    """Exception raised when model file is not found."""
+    pass
+def preprocess_img(img: np.ndarray) -> np.ndarray:
+    """Preprocesses a BGR image for DNN. Turning to float if not already and normalizing to [0, 1].
+    Normalization of uint images is done by dividing by brightest possible value (e.g. 255 for uint8).
+    Arguments:
+        img: The image to preprocess, can be uint8, uint16, float16, float32 or float64.
+    Returns:
+        The preprocessed image in np.float32 format.
+    Raises:
+        ImageFormatError: If the image is not three channels or not uint8, uint16, float16, float32 or float64.
+    """
+    if img.ndim != 3 or img.shape[2] != 3:
+        raise ImageFormatError("image must be 3 channels, got shape: {img.shape}")
+    if img.dtype not in [np.uint8, np.uint16, np.float16, np.float32, np.float64]:  # noqa: PLR6201
+        raise ImageFormatError("image must be uint8 or float16, float32, float64")
+    if img.dtype == np.uint8:
+        img = img.astype(np.float32) / UINT8_MAX
+    if img.dtype == np.uint16:
+        img = img.astype(np.float32) / UINT16_MAX
+    img = np.clip(img, 0, 1)
+    return img.astype(np.float32)
+def prepare_image_for_model(image: np.ndarray, roi_size: int = 512) -> tuple[np.ndarray, dict]:
+    """Prepare any input image for model inference by resizing to roi_size x roi_size.
+    This function takes an image of any size and prepares it for a model that expects
+    a square input (e.g., 512x512). It handles aspect ratio preservation by padding
+    with replicated border values.
+    Args:
+        image: Input image of any size
+        roi_size: Target size for the model (default 512)
+    Returns:
+        tuple: (preprocessed_image, metadata_dict)
+            - preprocessed_image: Image resized to roi_size x roi_size
+            - metadata_dict: Contains information needed to composite back to original size
+    """
+    # Get original shape
+    original_shape = image.shape[:2]  # (height, width)
+    # Calculate padding to make the image square
+    if original_shape[0] < original_shape[1]:
+        pad_h = (original_shape[1] - original_shape[0]) // 2
+        pad_w = 0
+        pad_h_extra = original_shape[1] - original_shape[0] - pad_h
+        pad_w_extra = 0
+    elif original_shape[0] > original_shape[1]:
+        pad_w = (original_shape[0] - original_shape[1]) // 2
+        pad_h = 0
+        pad_w_extra = original_shape[0] - original_shape[1] - pad_w
+        pad_h_extra = 0
+    else:
+        pad_h = pad_w = pad_h_extra = pad_w_extra = 0
+    # Pad the image to make it square
+    padded_image = cv2.copyMakeBorder(
+        image,
+        top=pad_h,
+        bottom=pad_h_extra,
+        left=pad_w,
+        right=pad_w_extra,
+        borderType=cv2.BORDER_REPLICATE,
+    )
+    square_shape = padded_image.shape[:2]
+    while padded_image.shape[1] > roi_size * 3 and padded_image.shape[0] > roi_size * 3:
+        padded_image = cv2.pyrDown(padded_image)
+    resized_image = cv2.resize(padded_image, (roi_size, roi_size), interpolation=cv2.INTER_LINEAR)
+    metadata = {
+        "original_shape": original_shape,
+        "square_shape": square_shape,
+        "original_padding": (pad_h, pad_w, pad_h_extra, pad_w_extra),
+    }
+    return resized_image, metadata
+def composite_model_output_to_image(
+    model_output: np.ndarray, metadata: dict, interp_mode: int = cv2.INTER_NEAREST
+) -> np.ndarray:
+    """Composite model output back to the original image size.
+    Takes the model output (which should be roi_size x roi_size) and composites it
+    back to the original image dimensions using the metadata from prepare_image_for_model.
+    Args:
+        model_output: Output from the model (roi_size x roi_size)
+        metadata: Metadata dict returned from prepare_image_for_model
+        interp_mode: Interpolation mode for resizing (default INTER_NEAREST for discrete outputs)
+    Returns:
+        np.ndarray: Output composited to original image size
+    """
+    pad_h, pad_w, pad_h_extra, pad_w_extra = metadata["original_padding"]
+    # Resize the entire model output back to the square shape
+    square_shape = metadata["square_shape"]
+    resized_to_square = cv2.resize(model_output, (square_shape[1], square_shape[0]), interpolation=interp_mode)
+    # Remove the padding to get back to original dimensions
+    if pad_h > 0 or pad_h_extra > 0:
+        final_output = resized_to_square[pad_h : square_shape[0] - pad_h_extra, :]
+    elif pad_w > 0 or pad_w_extra > 0:
+        final_output = resized_to_square[:, pad_w : square_shape[1] - pad_w_extra]
+    else:
+        final_output = resized_to_square
+    return final_output