Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -413,7 +413,7 @@ def validate_single_object(mask: np.ndarray, paper_contour: np.ndarray) -> None:
|
|
| 413 |
|
| 414 |
# Filter out very small contours (noise) and paper-sized contours
|
| 415 |
image_area = mask.shape[0] * mask.shape[1]
|
| 416 |
-
min_area =
|
| 417 |
max_area = image_area * 0.5 # Maximum 50% of image area (to exclude paper detection)
|
| 418 |
significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
|
| 419 |
|
|
@@ -514,17 +514,15 @@ def remove_bg(image: np.ndarray) -> np.ndarray:
|
|
| 514 |
|
| 515 |
# return result_mask
|
| 516 |
def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
|
| 517 |
-
"""
|
| 518 |
-
Black out paper area in the input image before sending to BiRefNet
|
| 519 |
-
"""
|
| 520 |
masked_image = image.copy()
|
| 521 |
|
| 522 |
-
#
|
| 523 |
rect = cv2.boundingRect(paper_contour)
|
| 524 |
-
shrink_pixels = int(min(rect[2], rect[3]) * 0.
|
| 525 |
|
| 526 |
x, y, w, h = rect
|
| 527 |
-
# Create mask
|
| 528 |
outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
|
| 529 |
|
| 530 |
inner_contour = np.array([
|
|
@@ -534,11 +532,8 @@ def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np
|
|
| 534 |
[[x + shrink_pixels, y + h - shrink_pixels]]
|
| 535 |
])
|
| 536 |
|
| 537 |
-
# Black out everything outside inner paper bounds
|
| 538 |
cv2.fillPoly(outer_mask, [inner_contour], 0)
|
| 539 |
-
|
| 540 |
-
# Apply mask to image
|
| 541 |
-
masked_image[outer_mask == 255] = [0, 0, 0] # Black out paper areas
|
| 542 |
|
| 543 |
return masked_image
|
| 544 |
|
|
@@ -983,46 +978,44 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
|
|
| 983 |
|
| 984 |
# Use YOLOWorld to detect object bounding box
|
| 985 |
yolo_world = get_yolo_world()
|
|
|
|
| 986 |
if yolo_world is None:
|
| 987 |
logger.warning("YOLOWorld model not available, proceeding with full image")
|
| 988 |
cropped_image = masked_input_image
|
| 989 |
-
crop_offset = (0, 0)
|
| 990 |
else:
|
| 991 |
-
|
| 992 |
-
yolo_world.
|
| 993 |
-
results = yolo_world.predict(masked_input_image, conf=0.2, verbose=False)
|
| 994 |
|
| 995 |
if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
|
| 996 |
logger.warning("No objects detected by YOLOWorld, proceeding with full image")
|
| 997 |
cropped_image = masked_input_image
|
| 998 |
crop_offset = (0, 0)
|
| 999 |
else:
|
| 1000 |
-
# Get the highest confidence box
|
| 1001 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
| 1002 |
confidences = results[0].boxes.conf.cpu().numpy()
|
| 1003 |
-
best_box_idx = np.argmax(confidences)
|
| 1004 |
-
x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
|
| 1005 |
|
| 1006 |
-
#
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
|
|
|
|
|
|
|
|
|
| 1012 |
|
| 1013 |
-
|
| 1014 |
-
if x_max <= x_min or y_max <= y_min:
|
| 1015 |
-
logger.warning("Invalid crop region, proceeding with full image")
|
| 1016 |
cropped_image = masked_input_image
|
| 1017 |
crop_offset = (0, 0)
|
| 1018 |
else:
|
| 1019 |
-
#
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
|
| 1023 |
|
| 1024 |
-
#
|
| 1025 |
-
|
|
|
|
| 1026 |
|
| 1027 |
# Remove background from cropped image
|
| 1028 |
orig_size = image.shape[:2]
|
|
@@ -1049,7 +1042,7 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
|
|
| 1049 |
|
| 1050 |
# Check if we actually have object pixels after paper exclusion
|
| 1051 |
object_pixels = np.count_nonzero(objects_mask)
|
| 1052 |
-
if object_pixels <
|
| 1053 |
raise NoObjectDetectedError("No significant object detected after excluding paper area")
|
| 1054 |
|
| 1055 |
# Validate single object
|
|
|
|
| 413 |
|
| 414 |
# Filter out very small contours (noise) and paper-sized contours
|
| 415 |
image_area = mask.shape[0] * mask.shape[1]
|
| 416 |
+
min_area = 100 # Minimum area threshold
|
| 417 |
max_area = image_area * 0.5 # Maximum 50% of image area (to exclude paper detection)
|
| 418 |
significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
|
| 419 |
|
|
|
|
| 514 |
|
| 515 |
# return result_mask
|
| 516 |
def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
|
| 517 |
+
"""Less aggressive masking to preserve corner objects"""
|
|
|
|
|
|
|
| 518 |
masked_image = image.copy()
|
| 519 |
|
| 520 |
+
# Much less aggressive shrinking - only 2% instead of 8%
|
| 521 |
rect = cv2.boundingRect(paper_contour)
|
| 522 |
+
shrink_pixels = max(5, int(min(rect[2], rect[3]) * 0.02)) # Changed from 0.08 to 0.02
|
| 523 |
|
| 524 |
x, y, w, h = rect
|
| 525 |
+
# Create mask but keep more area
|
| 526 |
outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
|
| 527 |
|
| 528 |
inner_contour = np.array([
|
|
|
|
| 532 |
[[x + shrink_pixels, y + h - shrink_pixels]]
|
| 533 |
])
|
| 534 |
|
|
|
|
| 535 |
cv2.fillPoly(outer_mask, [inner_contour], 0)
|
| 536 |
+
masked_image[outer_mask == 255] = [128, 128, 128] # Gray instead of black
|
|
|
|
|
|
|
| 537 |
|
| 538 |
return masked_image
|
| 539 |
|
|
|
|
| 978 |
|
| 979 |
# Use YOLOWorld to detect object bounding box
|
| 980 |
yolo_world = get_yolo_world()
|
| 981 |
+
# Lower confidence and add size-based filtering
|
| 982 |
if yolo_world is None:
|
| 983 |
logger.warning("YOLOWorld model not available, proceeding with full image")
|
| 984 |
cropped_image = masked_input_image
|
| 985 |
+
crop_offset = (0, 0)
|
| 986 |
else:
|
| 987 |
+
yolo_world.set_classes(["small object", "tool", "item", "component", "part", "piece", "device"])
|
| 988 |
+
results = yolo_world.predict(masked_input_image, conf=0.05, verbose=False) # Much lower confidence
|
|
|
|
| 989 |
|
| 990 |
if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
|
| 991 |
logger.warning("No objects detected by YOLOWorld, proceeding with full image")
|
| 992 |
cropped_image = masked_input_image
|
| 993 |
crop_offset = (0, 0)
|
| 994 |
else:
|
|
|
|
| 995 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
| 996 |
confidences = results[0].boxes.conf.cpu().numpy()
|
|
|
|
|
|
|
| 997 |
|
| 998 |
+
# Filter out boxes that are too large (likely paper detection)
|
| 999 |
+
valid_boxes = []
|
| 1000 |
+
image_area = masked_input_image.shape[0] * masked_input_image.shape[1]
|
| 1001 |
+
|
| 1002 |
+
for i, box in enumerate(boxes):
|
| 1003 |
+
x_min, y_min, x_max, y_max = box
|
| 1004 |
+
box_area = (x_max - x_min) * (y_max - y_min)
|
| 1005 |
+
if box_area < image_area * 0.3: # Reject if larger than 30% of image
|
| 1006 |
+
valid_boxes.append((i, confidences[i]))
|
| 1007 |
|
| 1008 |
+
if not valid_boxes:
|
|
|
|
|
|
|
| 1009 |
cropped_image = masked_input_image
|
| 1010 |
crop_offset = (0, 0)
|
| 1011 |
else:
|
| 1012 |
+
# Get highest confidence valid box
|
| 1013 |
+
best_idx = max(valid_boxes, key=lambda x: x[1])[0]
|
| 1014 |
+
x_min, y_min, x_max, y_max = map(int, boxes[best_idx])
|
|
|
|
| 1015 |
|
| 1016 |
+
# Larger margin for small objects
|
| 1017 |
+
box_size = min(x_max - x_min, y_max - y_min)
|
| 1018 |
+
margin = max(30, int(box_size * 0.3)) # At least 30px margin
|
| 1019 |
|
| 1020 |
# Remove background from cropped image
|
| 1021 |
orig_size = image.shape[:2]
|
|
|
|
| 1042 |
|
| 1043 |
# Check if we actually have object pixels after paper exclusion
|
| 1044 |
object_pixels = np.count_nonzero(objects_mask)
|
| 1045 |
+
if object_pixels < 300: # Minimum threshold
|
| 1046 |
raise NoObjectDetectedError("No significant object detected after excluding paper area")
|
| 1047 |
|
| 1048 |
# Validate single object
|