Spaces:

purkrmir
/

BBoxMaskPose_demo

Build error

App Files Files Community

Miroslav Purkrabek commited on Aug 14, 2025

Commit

7ebd068

1 Parent(s): e0c4840

first code with BMP demo

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +181 -9
demo/demo_utils.py +37 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: ProbPose Demo
 emoji: 🐠
 colorFrom: gray
 colorTo: yellow

 ---
+title: BBoxMaskPose Demo
 emoji: 🐠
 colorFrom: gray
 colorTo: yellow

app.py CHANGED Viewed

@@ -1,22 +1,194 @@
 import gradio as gr
 import spaces
 @spaces.GPU(duration=60)
 def process_image_with_BMP(
-    image,
-):
     """
-    Performs object detection using SAHI with a specified YOLOv11 model.
     Args:
-        image (PIL.Image.Image): The input image for detection.
     Returns:
-        tuple: A tuple containing two PIL.Image.Image objects:
-               - The image with standard YOLO inference results.
-               - The image with SAHI sliced YOLO inference results.
     """
-    return image, image
 with gr.Blocks() as app:

 import gradio as gr
 import spaces
+# Copyright (c) OpenMMLab. All rights reserved.
+"""
+BMP Demo script: sequentially runs detection, pose estimation, SAM-based mask refinement, and visualization.
+Usage:
+    python bmp_demo.py <config.yaml> <input_image> [--output-root <dir>]
+"""
+import os
+import shutil
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+import mmcv
+import mmengine
+import numpy as np
+import yaml
+from demo.demo_utils import DotDict, concat_instances, create_GIF, filter_instances, pose_nms, visualize_demo
+from demo.mm_utils import run_MMDetector, run_MMPose
+from mmdet.apis import init_detector
+from mmengine.logging import print_log
+from mmengine.structures import InstanceData
+from demo.sam2_utils import prepare_model as prepare_sam2_model
+from demo.sam2_utils import process_image_with_SAM
+from mmpose.apis import init_model as init_pose_estimator
+from mmpose.utils import adapt_mmdet_pipeline
+# Default thresholds
+DEFAULT_CAT_ID: int = 0
+DEFAULT_BBOX_THR: float = 0.3
+DEFAULT_NMS_THR: float = 0.3
+DEFAULT_KPT_THR: float = 0.3
+# Global models variable
+det_model = None
+pose_model = None
+sam2_model = None
+def _parse_yaml_config(yaml_path: Path) -> DotDict:
+    """
+    Load BMP configuration from a YAML file.
+    Args:
+        yaml_path (Path): Path to YAML config.
+    Returns:
+        DotDict: Nested config dictionary.
+    """
+    with open(yaml_path, "r") as f:
+        cfg = yaml.safe_load(f)
+    return DotDict(cfg)
+def load_models(bmp_config):
+    device = 'gpu'
+    global det_model, pose_model, sam2_model
+    # build detectors
+    det_model = init_detector(bmp_config.detector.det_config, bmp_config.detector.det_checkpoint, device='gpu')
+    det_model.cfg = adapt_mmdet_pipeline(det_model.cfg)
+    # build pose estimator
+    pose_model = init_pose_estimator(
+        bmp_config.pose_estimator.pose_config,
+        bmp_config.pose_estimator.pose_checkpoint,
+        device=device,
+        cfg_options=dict(model=dict(test_cfg=dict(output_heatmaps=False))),
+    )
+    sam2_model = prepare_sam2_model(
+        model_cfg=bmp_config.sam2.sam2_config,
+        model_checkpoint=bmp_config.sam2.sam2_checkpoint,
+    )
+    return det_model, pose_model, sam2_model
 @spaces.GPU(duration=60)
 def process_image_with_BMP(
+    img: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
     """
+    Run the full BMP pipeline on a single image: detection, pose, SAM mask refinement, and visualization.
     Args:
+        args (Namespace): Parsed CLI arguments.
+        bmp_config (DotDict): Configuration parameters.
+        img_path (Path): Path to the input image.
+        detector: Primary MMDetection model.
+        detector_prime: Secondary MMDetection model for iterations.
+        pose_estimator: MMPose model for keypoint estimation.
+        sam2_model: SAM model for mask refinement.
     Returns:
+        InstanceData: Final merged detections and refined masks.
     """
+    bmp_config = _parse_yaml_config(Path("configs/bmp_D3.yaml"))
+    load_models(bmp_config)
+    img_for_detection = img.copy()
+    all_detections = None
+    for iteration in range(bmp_config.num_bmp_iters):
+        # Step 1: Detection
+        det_instances = run_MMDetector(
+            det_model,
+            img_for_detection,
+            det_cat_id=DEFAULT_CAT_ID,
+            bbox_thr=DEFAULT_BBOX_THR,
+            nms_thr=DEFAULT_NMS_THR,
+        )
+        if len(det_instances.bboxes) == 0:
+            continue
+        # Step 2: Pose estimation
+        pose_instances = run_MMPose(
+            pose_model,
+            img.copy(),
+            detections=det_instances,
+            kpt_thr=DEFAULT_KPT_THR,
+        )
+        # Restrict to first 17 COCO keypoints
+        pose_instances.keypoints = pose_instances.keypoints[:, :17, :]
+        pose_instances.keypoint_scores = pose_instances.keypoint_scores[:, :17]
+        pose_instances.keypoints = np.concatenate(
+            [pose_instances.keypoints, pose_instances.keypoint_scores[:, :, None]], axis=-1
+        )
+        # Step 3: Pose-NMS and SAM refinement
+        all_keypoints = (
+            pose_instances.keypoints
+            if all_detections is None
+            else np.concatenate([all_detections.keypoints, pose_instances.keypoints], axis=0)
+        )
+        all_bboxes = (
+            pose_instances.bboxes
+            if all_detections is None
+            else np.concatenate([all_detections.bboxes, pose_instances.bboxes], axis=0)
+        )
+        num_valid_kpts = np.sum(all_keypoints[:, :, 2] > bmp_config.sam2.prompting.confidence_thr, axis=1)
+        keep_indices = pose_nms(
+            DotDict({"confidence_thr": bmp_config.sam2.prompting.confidence_thr, "oks_thr": bmp_config.oks_nms_thr}),
+            image_kpts=all_keypoints,
+            image_bboxes=all_bboxes,
+            num_valid_kpts=num_valid_kpts,
+        )
+        keep_indices = sorted(keep_indices)  # Sort by original index
+        num_old_detections = 0 if all_detections is None else len(all_detections.bboxes)
+        keep_new_indices = [i - num_old_detections for i in keep_indices if i >= num_old_detections]
+        keep_old_indices = [i for i in keep_indices if i < num_old_detections]
+        if len(keep_new_indices) == 0:
+            print_log("No new instances passed pose NMS, skipping SAM refinement.", logger="current")
+            continue
+        # filter new detections and compute scores
+        new_dets = filter_instances(pose_instances, keep_new_indices)
+        new_dets.scores = pose_instances.keypoint_scores[keep_new_indices].mean(axis=-1)
+        old_dets = None
+        if len(keep_old_indices) > 0:
+            old_dets = filter_instances(all_detections, keep_old_indices)
+        new_detections = process_image_with_SAM(
+            DotDict(bmp_config.sam2.prompting),
+            img.copy(),
+            sam2_model,
+            new_dets,
+            old_dets if old_dets is not None else None,
+        )
+        # Merge detections
+        if all_detections is None:
+            all_detections = new_detections
+        else:
+            all_detections = concat_instances(all_detections, new_dets)
+        # Step 4: Visualization
+        img_for_detection, _, _ = visualize_demo(
+            img.copy(),
+            all_detections,
+        )
+    _, rtmdet_result, bmp_result = visualize_demo(
+        img.copy(),
+        all_detections,
+    )
+    return rtmdet_result, bmp_result
 with gr.Blocks() as app:

demo/demo_utils.py CHANGED Viewed

@@ -292,6 +292,43 @@ def visualize_itteration(
     return masked_out
 def create_GIF(
     img_path: Path,
     output_root: Path,

     return masked_out
+def visualize_demo(
+    img: np.ndarray, detections: Any,
+) -> Optional[np.ndarray]:
+    """
+    Generate and save visualization images for each BMP iteration.
+    Args:
+        img (np.ndarray): Original input image.
+        detections: InstanceData containing bboxes, scores, masks, keypoints.
+        iteration_idx (int): Current iteration index (0-based).
+        output_root (Path): Directory to save output images.
+        img_name (str): Base name of the image without extension.
+        with_text (bool): Whether to overlay text labels.
+    Returns:
+        Optional[np.ndarray]: The masked-out image if generated, else None.
+    """
+    bboxes = detections.bboxes
+    scores = detections.scores
+    pred_masks = detections.pred_masks
+    refined_masks = detections.refined_masks
+    keypoints = detections.keypoints
+    returns = []
+    for vis_def in [
+        {"type": "mask-out", "masks": refined_masks, "label": ""},
+        {"type": "bbox+mask", "masks": pred_masks, "label": "RTMDet-L"},
+        {"type": "mask+pose", "masks": refined_masks, "label": "BMP"},
+    ]:
+        vis_img, colors = _visualize_predictions(
+            img.copy(), bboxes, scores, vis_def["masks"], keypoints, vis_type=vis_def["type"], mask_is_binary=True
+        )
+        returns.append(vis_img)
+    return returns
 def create_GIF(
     img_path: Path,
     output_root: Path,