Spaces:

BiasLab2025
/

perception

Running on A10G

App Files Files Community

Zhen Ye commited on Jan 7

Commit

5658a25

1 Parent(s): af1a203

modified segmentation bbox display

Browse files

Files changed (1) hide show

inference.py +42 -13

inference.py CHANGED Viewed

@@ -44,8 +44,8 @@ def draw_boxes(
         cv2.rectangle(output, (x1, y1), (x2, y2), color, thickness=2)
         if label:
             font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 0.5
-            thickness = 1
             text_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
             text_w, text_h = text_size
             pad = 4
@@ -67,18 +67,15 @@ def draw_boxes(
     return output
-def draw_masks(frame: np.ndarray, masks: np.ndarray, alpha: float = 0.45) -> np.ndarray:
     output = frame.copy()
     if masks is None or len(masks) == 0:
         return output
-    colors = [
-        (255, 0, 0),
-        (0, 255, 0),
-        (0, 0, 255),
-        (255, 255, 0),
-        (0, 255, 255),
-        (255, 0, 255),
-    ]
     for idx, mask in enumerate(masks):
         if mask is None:
             continue
@@ -88,8 +85,39 @@ def draw_masks(frame: np.ndarray, masks: np.ndarray, alpha: float = 0.45) -> np.
             mask = cv2.resize(mask, (output.shape[1], output.shape[0]), interpolation=cv2.INTER_NEAREST)
         mask_bool = mask.astype(bool)
         overlay = np.zeros_like(output, dtype=np.uint8)
-        overlay[mask_bool] = colors[idx % len(colors)]
         output = cv2.addWeighted(output, 1.0, overlay, alpha, 0)
     return output
@@ -169,7 +197,8 @@ def infer_segmentation_frame(
     lock = _get_model_lock("segmenter", segmenter.name)
     with lock:
         result = segmenter.predict(frame, text_prompts=text_queries)
-    return draw_masks(frame, result.masks), result
 def extract_first_frame(video_path: str) -> Tuple[np.ndarray, float, int, int]:

         cv2.rectangle(output, (x1, y1), (x2, y2), color, thickness=2)
         if label:
             font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 1.0
+            thickness = 2
             text_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
             text_w, text_h = text_size
             pad = 4
     return output
+def draw_masks(
+    frame: np.ndarray,
+    masks: np.ndarray,
+    alpha: float = 0.45,
+    labels: Optional[Sequence[str]] = None,
+) -> np.ndarray:
     output = frame.copy()
     if masks is None or len(masks) == 0:
         return output
     for idx, mask in enumerate(masks):
         if mask is None:
             continue
             mask = cv2.resize(mask, (output.shape[1], output.shape[0]), interpolation=cv2.INTER_NEAREST)
         mask_bool = mask.astype(bool)
         overlay = np.zeros_like(output, dtype=np.uint8)
+        label = None
+        if labels and idx < len(labels):
+            label = labels[idx]
+        if not label:
+            label = f"object_{idx}"
+        color = _color_for_label(label)
+        overlay[mask_bool] = color
         output = cv2.addWeighted(output, 1.0, overlay, alpha, 0)
+        if label:
+            coords = np.column_stack(np.where(mask_bool))
+            if coords.size:
+                y, x = coords[0]
+                font = cv2.FONT_HERSHEY_SIMPLEX
+                font_scale = 1.0
+                thickness = 2
+                text_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
+                text_w, text_h = text_size
+                pad = 4
+                text_x = int(x)
+                text_y = max(int(y) - 6, text_h + pad)
+                box_top_left = (text_x, text_y - text_h - pad)
+                box_bottom_right = (text_x + text_w + pad, text_y + baseline)
+                cv2.rectangle(output, box_top_left, box_bottom_right, color, thickness=-1)
+                cv2.putText(
+                    output,
+                    label,
+                    (text_x + pad // 2, text_y - 2),
+                    font,
+                    font_scale,
+                    (255, 255, 255),
+                    thickness,
+                    lineType=cv2.LINE_AA,
+                )
     return output
     lock = _get_model_lock("segmenter", segmenter.name)
     with lock:
         result = segmenter.predict(frame, text_prompts=text_queries)
+    labels = text_queries or []
+    return draw_masks(frame, result.masks, labels=labels), result
 def extract_first_frame(video_path: str) -> Tuple[np.ndarray, float, int, int]: