Spaces:

MLBench
/

Contour_Detection_Paper

Running

App Files Files Community

mlbench123 commited on Aug 12, 2025

Commit

70843b0

verified ·

1 Parent(s): 4ce1663

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -34

app.py CHANGED Viewed

@@ -413,7 +413,7 @@ def validate_single_object(mask: np.ndarray, paper_contour: np.ndarray) -> None:
     # Filter out very small contours (noise) and paper-sized contours
     image_area = mask.shape[0] * mask.shape[1]
-    min_area = 1000  # Minimum area threshold
     max_area = image_area * 0.5  # Maximum 50% of image area (to exclude paper detection)
     significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
@@ -514,17 +514,15 @@ def remove_bg(image: np.ndarray) -> np.ndarray:
 #     return result_mask
 def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
-    """
-    Black out paper area in the input image before sending to BiRefNet
-    """
     masked_image = image.copy()
-    # Create more aggressive paper mask
     rect = cv2.boundingRect(paper_contour)
-    shrink_pixels = int(min(rect[2], rect[3]) * 0.08)  # 8% shrink
     x, y, w, h = rect
-    # Create mask for everything OUTSIDE the inner paper area
     outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
     inner_contour = np.array([
@@ -534,11 +532,8 @@ def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np
         [[x + shrink_pixels, y + h - shrink_pixels]]
     ])
-    # Black out everything outside inner paper bounds
     cv2.fillPoly(outer_mask, [inner_contour], 0)
-    # Apply mask to image
-    masked_image[outer_mask == 255] = [0, 0, 0]  # Black out paper areas
     return masked_image
@@ -983,46 +978,44 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
         # Use YOLOWorld to detect object bounding box
         yolo_world = get_yolo_world()
         if yolo_world is None:
             logger.warning("YOLOWorld model not available, proceeding with full image")
             cropped_image = masked_input_image
-            crop_offset = (0, 0)  # No offset if not cropping
         else:
-            # Set prompts for tool/object detection
-            yolo_world.set_classes(["tool", "object on paper"])
-            results = yolo_world.predict(masked_input_image, conf=0.2, verbose=False)
             if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
                 logger.warning("No objects detected by YOLOWorld, proceeding with full image")
                 cropped_image = masked_input_image
                 crop_offset = (0, 0)
             else:
-                # Get the highest confidence box
                 boxes = results[0].boxes.xyxy.cpu().numpy()
                 confidences = results[0].boxes.conf.cpu().numpy()
-                best_box_idx = np.argmax(confidences)
-                x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
-                # Add margin (e.g., 10% of box size, min 20px)
-                margin = max(20, int(min(x_max - x_min, y_max - y_min) * 0.1))
-                x_min = max(0, x_min - margin)
-                y_min = max(0, y_min - margin)
-                x_max = min(masked_input_image.shape[1], x_max + margin)
-                y_max = min(masked_input_image.shape[0], y_max + margin)
-                # Validate crop region
-                if x_max <= x_min or y_max <= y_min:
-                    logger.warning("Invalid crop region, proceeding with full image")
                     cropped_image = masked_input_image
                     crop_offset = (0, 0)
                 else:
-                    # Crop the masked image
-                    cropped_image = masked_input_image[y_min:y_max, x_min:x_max]
-                    crop_offset = (x_min, y_min)  # Store offset for mask realignment
-                    logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
-                    # Debug: Save cropped image
-                    cv2.imwrite("./debug/cropped_image.jpg", cropped_image)
         # Remove background from cropped image
         orig_size = image.shape[:2]
@@ -1049,7 +1042,7 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
         # Check if we actually have object pixels after paper exclusion
         object_pixels = np.count_nonzero(objects_mask)
-        if object_pixels < 1000:  # Minimum threshold
             raise NoObjectDetectedError("No significant object detected after excluding paper area")
         # Validate single object

     # Filter out very small contours (noise) and paper-sized contours
     image_area = mask.shape[0] * mask.shape[1]
+    min_area = 100  # Minimum area threshold
     max_area = image_area * 0.5  # Maximum 50% of image area (to exclude paper detection)
     significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
 #     return result_mask
 def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
+    """Less aggressive masking to preserve corner objects"""
     masked_image = image.copy()
+    # Much less aggressive shrinking - only 2% instead of 8%
     rect = cv2.boundingRect(paper_contour)
+    shrink_pixels = max(5, int(min(rect[2], rect[3]) * 0.02))  # Changed from 0.08 to 0.02
     x, y, w, h = rect
+    # Create mask but keep more area
     outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
     inner_contour = np.array([
         [[x + shrink_pixels, y + h - shrink_pixels]]
     ])
     cv2.fillPoly(outer_mask, [inner_contour], 0)
+    masked_image[outer_mask == 255] = [128, 128, 128]  # Gray instead of black
     return masked_image
         # Use YOLOWorld to detect object bounding box
         yolo_world = get_yolo_world()
+        # Lower confidence and add size-based filtering
         if yolo_world is None:
             logger.warning("YOLOWorld model not available, proceeding with full image")
             cropped_image = masked_input_image
+            crop_offset = (0, 0)
         else:
+            yolo_world.set_classes(["small object", "tool", "item", "component", "part", "piece", "device"])
+            results = yolo_world.predict(masked_input_image, conf=0.05, verbose=False)  # Much lower confidence
             if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
                 logger.warning("No objects detected by YOLOWorld, proceeding with full image")
                 cropped_image = masked_input_image
                 crop_offset = (0, 0)
             else:
                 boxes = results[0].boxes.xyxy.cpu().numpy()
                 confidences = results[0].boxes.conf.cpu().numpy()
+                # Filter out boxes that are too large (likely paper detection)
+                valid_boxes = []
+                image_area = masked_input_image.shape[0] * masked_input_image.shape[1]
+                for i, box in enumerate(boxes):
+                    x_min, y_min, x_max, y_max = box
+                    box_area = (x_max - x_min) * (y_max - y_min)
+                    if box_area < image_area * 0.3:  # Reject if larger than 30% of image
+                        valid_boxes.append((i, confidences[i]))
+                if not valid_boxes:
                     cropped_image = masked_input_image
                     crop_offset = (0, 0)
                 else:
+                    # Get highest confidence valid box
+                    best_idx = max(valid_boxes, key=lambda x: x[1])[0]
+                    x_min, y_min, x_max, y_max = map(int, boxes[best_idx])
+                    # Larger margin for small objects
+                    box_size = min(x_max - x_min, y_max - y_min)
+                    margin = max(30, int(box_size * 0.3))  # At least 30px margin
         # Remove background from cropped image
         orig_size = image.shape[:2]
         # Check if we actually have object pixels after paper exclusion
         object_pixels = np.count_nonzero(objects_mask)
+        if object_pixels < 300:  # Minimum threshold
             raise NoObjectDetectedError("No significant object detected after excluding paper area")
         # Validate single object