Spaces:

okupyn
/

s3od

Runtime error

App Files Files Community

okupyn commited on Oct 27, 2025

Commit

4993e87

verified ·

1 Parent(s): 6268a55

Upload app.py

Browse files

Files changed (1) hide show

app.py +144 -47

app.py CHANGED Viewed

@@ -1,12 +1,31 @@
 import gradio as gr
 import numpy as np
-from PIL import Image
 from fire import Fire
 from s3od import BackgroundRemoval
 from s3od.visualizer import visualize_removal
-detector = BackgroundRemoval()
 VISUALIZATION_METHODS = {
     'Transparent Background': 'transparent',
@@ -16,73 +35,151 @@ VISUALIZATION_METHODS = {
 }
-def process_image(image, method, threshold):
     if image is None:
-        return None
     result = detector.remove_background(image, threshold=threshold)
     if method == 'transparent':
-        return result.rgba_image
     elif method == 'white':
-        return visualize_removal(image, result, background_color=(255, 255, 255))
     elif method == 'green':
-        return visualize_removal(image, result, background_color=(0, 255, 0))
     elif method == 'mask':
         mask_vis = (result.predicted_mask * 255).astype(np.uint8)
-        return Image.fromarray(mask_vis, mode='L')
-    return result.rgba_image
-iface = gr.Interface(
-    fn=process_image,
-    inputs=[
-        gr.Image(type="numpy", label="Upload an Image"),
-        gr.Radio(
-            list(VISUALIZATION_METHODS.keys()),
-            label="Output Format",
-            value='Transparent Background'
-        ),
-        gr.Slider(
-            minimum=0.0,
-            maximum=1.0,
-            value=0.5,
-            step=0.05,
-            label="Mask Threshold"
-        )
-    ],
-    outputs=gr.Image(type="pil", label="Result"),
-    title="Demo: S3OD - Synthetic Salient Object Detection",
-    description="""
     Upload an image to remove its background using **S3OD**!
     S3OD is trained on a large-scale fully synthetic dataset (140K+ images) generated with diffusion models.
-    Despite being trained only on synthetic data, it achieves state-of-the-art performance on real-world images.
-    The model uses a DPT-based architecture with DINOv3 vision transformer backbone for robust salient object detection
-    and can process images of any size. Choose from four visualization methods: transparent background (RGBA),
-    white background, green background (chroma key), or mask only.
     **Key Features:**
-    - Single-step background removal
     - Multi-mask prediction with IoU scoring
-    - Adjustable threshold for fine-tuning
     - Works on any image resolution
-    Ideal for applications in e-commerce, content creation, photo editing, and computer vision research.
-    📄 [Paper](https://arxiv.org/abs/XXXX.XXXXX) | 💻 [GitHub](https://github.com/KupynOrest/s3od) | 🤗 [Model](https://huggingface.co/okupyn/s3od) | 🗂️ [Dataset](https://huggingface.co/datasets/okupyn/s3od_dataset)
-    """,
-    allow_flagging='never',
-    examples=[
-        # Add example images here when available
-    ]
-)
 def main(server_name="0.0.0.0", server_port=7860, share=False):
-    iface.launch(
         server_name=server_name,
         server_port=server_port,
         share=share

 import gradio as gr
 import numpy as np
+from PIL import Image, ImageDraw, ImageFont
 from fire import Fire
 from s3od import BackgroundRemoval
 from s3od.visualizer import visualize_removal
+# Model variants mapping
+MODEL_VARIANTS = {
+    'General (Synth + Real)': 'okupyn/s3od',
+    'Synthetic Only': 'okupyn/s3od-synth',
+    'DIS-tuned': 'okupyn/s3od-dis',
+    'SOD-tuned': 'okupyn/s3od-sod',
+}
+# Cache loaded models to avoid reloading
+_model_cache = {}
+def get_detector(model_name):
+    """Get or load detector for the specified model."""
+    if model_name not in _model_cache:
+        print(f"Loading model: {model_name}")
+        _model_cache[model_name] = BackgroundRemoval(model_id=model_name)
+    return _model_cache[model_name]
+# Load default model
+detector = get_detector('okupyn/s3od')
 VISUALIZATION_METHODS = {
     'Transparent Background': 'transparent',
 }
+def compute_mask_iou(mask1, mask2):
+    """Compute IoU between two masks."""
+    intersection = np.logical_and(mask1 > 0.5, mask2 > 0.5).sum()
+    union = np.logical_or(mask1 > 0.5, mask2 > 0.5).sum()
+    return intersection / (union + 1e-6)
+def is_ambiguous(all_masks, threshold=0.8):
+    """Check if prediction is ambiguous based on mask IoU."""
+    if len(all_masks) < 2:
+        return False
+    # Compute IoU between all pairs
+    for i in range(len(all_masks)):
+        for j in range(i + 1, len(all_masks)):
+            iou = compute_mask_iou(all_masks[i], all_masks[j])
+            if iou < threshold:
+                return True
+    return False
+def create_masks_grid(all_masks, all_ious, image_shape):
+    """Create a grid showing all 3 masks side by side."""
+    h, w = image_shape[:2]
+    num_masks = len(all_masks)
+    # Create grid image
+    grid_w = w * num_masks
+    grid_h = h
+    grid = Image.new('L', (grid_w, grid_h), color=0)
+    for idx, (mask, iou) in enumerate(zip(all_masks, all_ious)):
+        # Convert mask to image
+        mask_img = Image.fromarray((mask * 255).astype(np.uint8), mode='L')
+        # Paste into grid
+        grid.paste(mask_img, (idx * w, 0))
+    return grid
+def process_image(image, model_key, method_key, threshold):
     if image is None:
+        return None, None, None
+    # Get the appropriate model
+    model_id = MODEL_VARIANTS.get(model_key, 'okupyn/s3od')
+    detector = get_detector(model_id)
     result = detector.remove_background(image, threshold=threshold)
+    method = VISUALIZATION_METHODS.get(method_key, 'transparent')
+    # Generate main output
     if method == 'transparent':
+        main_output = result.rgba_image
     elif method == 'white':
+        main_output = visualize_removal(image, result, background_color=(255, 255, 255))
     elif method == 'green':
+        main_output = visualize_removal(image, result, background_color=(0, 255, 0))
     elif method == 'mask':
         mask_vis = (result.predicted_mask * 255).astype(np.uint8)
+        main_output = Image.fromarray(mask_vis, mode='L')
+    else:
+        main_output = result.rgba_image
+    # Create masks grid
+    masks_grid = create_masks_grid(result.all_masks, result.all_ious, image.shape)
+    # Check if ambiguous
+    ambiguous = is_ambiguous(result.all_masks)
+    ambiguity_label = "⚠️ Ambiguous prediction (IoU < 0.8 between masks)" if ambiguous else "✓ Clear prediction"
+    return main_output, masks_grid, ambiguity_label
+with gr.Blocks(title="S3OD - Synthetic Salient Object Detection") as demo:
+    gr.Markdown("""
+    # S3OD: Synthetic Salient Object Detection
     Upload an image to remove its background using **S3OD**!
     S3OD is trained on a large-scale fully synthetic dataset (140K+ images) generated with diffusion models.
+    The model uses a DPT-based architecture with DINOv3 vision transformer backbone for robust salient object detection.
+    **Model Variants:**
+    - **General (Synth + Real)**: Default model trained on synthetic data and fine-tuned on all real datasets (DUTS, DIS, HR-SOD)
+    - **Synthetic Only**: Trained exclusively on S3OD synthetic dataset
+    - **DIS-tuned**: Fine-tuned specifically for highly-accurate dichotomous segmentation
+    - **SOD-tuned**: Optimized for general salient object detection tasks
     **Key Features:**
+    - Single-step background removal with soft masks (smooth edges)
     - Multi-mask prediction with IoU scoring
+    - Ambiguity detection for uncertain predictions
     - Works on any image resolution
+    📄 [Paper](https://arxiv.org/abs/2510.21605) | 💻 [GitHub](https://github.com/KupynOrest/s3od) | 🤗 [Model](https://huggingface.co/okupyn/s3od) | 🗂️ [Dataset](https://huggingface.co/datasets/okupyn/s3od_dataset)
+    """)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="numpy", label="Upload an Image")
+            model_dropdown = gr.Dropdown(
+                choices=list(MODEL_VARIANTS.keys()),
+                label="Model Variant",
+                value='General (Synth + Real)',
+                info="Choose the model variant trained on different datasets"
+            )
+            method_radio = gr.Radio(
+                list(VISUALIZATION_METHODS.keys()),
+                label="Output Format",
+                value='Transparent Background'
+            )
+            threshold_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.5,
+                step=0.05,
+                label="Mask Threshold"
+            )
+            submit_btn = gr.Button("Remove Background", variant="primary")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Result")
+            ambiguity_label = gr.Textbox(label="Prediction Quality", interactive=False)
+    with gr.Row():
+        masks_grid = gr.Image(type="pil", label="All 3 Predicted Masks (with IoU scores)")
+    submit_btn.click(
+        fn=process_image,
+        inputs=[input_image, model_dropdown, method_radio, threshold_slider],
+        outputs=[output_image, masks_grid, ambiguity_label]
+    )
+    # Also trigger on image upload
+    input_image.change(
+        fn=process_image,
+        inputs=[input_image, model_dropdown, method_radio, threshold_slider],
+        outputs=[output_image, masks_grid, ambiguity_label]
+    )
 def main(server_name="0.0.0.0", server_port=7860, share=False):
+    demo.launch(
         server_name=server_name,
         server_port=server_port,
         share=share