Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- measure_finger.py +127 -2
- script/validate_sam_card.py +3 -1
- src/sam_card_detection.py +48 -8
measure_finger.py
CHANGED
|
@@ -386,6 +386,81 @@ def _overlay_sam_masks(
|
|
| 386 |
return out
|
| 387 |
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
def _save_debug_visualization(path: str, image: np.ndarray) -> None:
|
| 390 |
"""Downscale + fast-encode a debug overlay image.
|
| 391 |
|
|
@@ -434,7 +509,9 @@ def _sam_card_detect(
|
|
| 434 |
middle_pip = landmarks[10, :2]
|
| 435 |
anchor_xy = (int(round(middle_pip[0])), int(round(middle_pip[1])))
|
| 436 |
|
| 437 |
-
|
|
|
|
|
|
|
| 438 |
if not seeds:
|
| 439 |
return None
|
| 440 |
|
|
@@ -445,12 +522,28 @@ def _sam_card_detect(
|
|
| 445 |
]
|
| 446 |
|
| 447 |
prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
|
| 448 |
-
|
| 449 |
image_canonical,
|
| 450 |
seed_points=seeds,
|
| 451 |
negative_points=negatives,
|
| 452 |
debug_dir=prompt_debug,
|
|
|
|
| 453 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
|
| 456 |
def measure_finger(
|
|
@@ -1032,6 +1125,18 @@ def measure_finger(
|
|
| 1032 |
scale_px_per_cm=px_per_cm,
|
| 1033 |
)
|
| 1034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
# Save result image (downscaled + JPEG-encoded for speed)
|
| 1036 |
_save_debug_visualization(result_png_path, debug_image)
|
| 1037 |
print(f"Result visualization saved to: {result_png_path}")
|
|
@@ -1355,6 +1460,18 @@ def measure_multi_finger(
|
|
| 1355 |
else:
|
| 1356 |
card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
|
| 1357 |
if card_result is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1358 |
return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
|
| 1359 |
px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
|
| 1360 |
view_angle_ok = scale_confidence > 0.9
|
|
@@ -1493,6 +1610,14 @@ def _draw_multi_finger_debug(
|
|
| 1493 |
if card_result is not None:
|
| 1494 |
vis = draw_card_overlay(vis, card_result, px_per_cm)
|
| 1495 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1496 |
# Draw per-finger Sobel edge overlays
|
| 1497 |
for fn, result in per_finger_raw.items():
|
| 1498 |
internal = result.get("_internal")
|
|
|
|
| 386 |
return out
|
| 387 |
|
| 388 |
|
| 389 |
+
def _overlay_card_seeds(
|
| 390 |
+
image: np.ndarray,
|
| 391 |
+
seed_debug: Optional[Dict[str, Any]],
|
| 392 |
+
rotation_matrix: Optional[np.ndarray] = None,
|
| 393 |
+
) -> np.ndarray:
|
| 394 |
+
"""Draw SAM card-detection prompt points on a debug image.
|
| 395 |
+
|
| 396 |
+
Positive seeds in green, palm-center negative in red, middle-PIP anchor
|
| 397 |
+
as a cyan cross. All points are assumed to be in the canonical (pre-
|
| 398 |
+
precise-rotation) frame; pass ``rotation_matrix`` to align with an image
|
| 399 |
+
that had the finger rotation applied.
|
| 400 |
+
"""
|
| 401 |
+
if not seed_debug:
|
| 402 |
+
return image
|
| 403 |
+
from src.geometry import transform_points_rotation
|
| 404 |
+
|
| 405 |
+
def _xform(points: List[Tuple[int, int]]) -> np.ndarray:
|
| 406 |
+
arr = np.asarray(points, dtype=np.float32)
|
| 407 |
+
if rotation_matrix is not None and len(arr) > 0:
|
| 408 |
+
arr = transform_points_rotation(arr, rotation_matrix)
|
| 409 |
+
return arr
|
| 410 |
+
|
| 411 |
+
out = image
|
| 412 |
+
h, w = out.shape[:2]
|
| 413 |
+
# Thin, map-style reference crosses: small, single-pixel-wide, softened.
|
| 414 |
+
marker_size = max(14, int(round(0.012 * max(h, w))))
|
| 415 |
+
thickness = 2
|
| 416 |
+
|
| 417 |
+
# Pastel variants so the markers read as reference lines rather than
|
| 418 |
+
# high-contrast callouts.
|
| 419 |
+
DROP_COLOR = (120, 220, 220) # soft yellow
|
| 420 |
+
KEEP_COLOR = (120, 220, 120) # soft green
|
| 421 |
+
NEG_COLOR = (120, 120, 220) # soft red
|
| 422 |
+
ANCHOR_COLOR = (220, 220, 120) # soft cyan
|
| 423 |
+
|
| 424 |
+
def _plus(pt, color):
|
| 425 |
+
cv2.drawMarker(
|
| 426 |
+
out, (int(pt[0]), int(pt[1])), color,
|
| 427 |
+
markerType=cv2.MARKER_CROSS,
|
| 428 |
+
markerSize=marker_size,
|
| 429 |
+
thickness=thickness,
|
| 430 |
+
line_type=cv2.LINE_AA,
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
# Seeds dropped by the hand-mask filter. Drawn first so any coincident
|
| 434 |
+
# kept seed paints on top.
|
| 435 |
+
dropped = seed_debug.get("dropped") or []
|
| 436 |
+
if dropped:
|
| 437 |
+
for pt in _xform(dropped).astype(int):
|
| 438 |
+
_plus(pt, DROP_COLOR)
|
| 439 |
+
|
| 440 |
+
seeds = seed_debug.get("seeds") or []
|
| 441 |
+
if seeds:
|
| 442 |
+
for pt in _xform(seeds).astype(int):
|
| 443 |
+
_plus(pt, KEEP_COLOR)
|
| 444 |
+
|
| 445 |
+
negatives = seed_debug.get("negatives") or []
|
| 446 |
+
if negatives:
|
| 447 |
+
for pt in _xform(negatives).astype(int):
|
| 448 |
+
_plus(pt, NEG_COLOR)
|
| 449 |
+
|
| 450 |
+
anchor = seed_debug.get("anchor")
|
| 451 |
+
if anchor is not None:
|
| 452 |
+
ax, ay = _xform([anchor])[0].astype(int)
|
| 453 |
+
# Tilted cross (X) to distinguish the anchor from the plus-shaped seeds.
|
| 454 |
+
cv2.drawMarker(
|
| 455 |
+
out, (int(ax), int(ay)), ANCHOR_COLOR,
|
| 456 |
+
markerType=cv2.MARKER_TILTED_CROSS,
|
| 457 |
+
markerSize=marker_size,
|
| 458 |
+
thickness=thickness,
|
| 459 |
+
line_type=cv2.LINE_AA,
|
| 460 |
+
)
|
| 461 |
+
return out
|
| 462 |
+
|
| 463 |
+
|
| 464 |
def _save_debug_visualization(path: str, image: np.ndarray) -> None:
|
| 465 |
"""Downscale + fast-encode a debug overlay image.
|
| 466 |
|
|
|
|
| 509 |
middle_pip = landmarks[10, :2]
|
| 510 |
anchor_xy = (int(round(middle_pip[0])), int(round(middle_pip[1])))
|
| 511 |
|
| 512 |
+
seed_info = suggest_card_seeds(hand_mask, image_canonical.shape[:2], anchor_xy)
|
| 513 |
+
seeds = seed_info["kept"]
|
| 514 |
+
dropped_seeds = seed_info["dropped"]
|
| 515 |
if not seeds:
|
| 516 |
return None
|
| 517 |
|
|
|
|
| 522 |
]
|
| 523 |
|
| 524 |
prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
|
| 525 |
+
card_result = detect_credit_card_sam_prompt(
|
| 526 |
image_canonical,
|
| 527 |
seed_points=seeds,
|
| 528 |
negative_points=negatives,
|
| 529 |
debug_dir=prompt_debug,
|
| 530 |
+
hand_mask=hand_mask,
|
| 531 |
)
|
| 532 |
+
# Stash seed geometry so the final result PNG can visualize what was
|
| 533 |
+
# prompted into SAM, even when card detection fails.
|
| 534 |
+
seed_debug = {
|
| 535 |
+
"anchor": anchor_xy,
|
| 536 |
+
"seeds": list(seeds),
|
| 537 |
+
"dropped": list(dropped_seeds),
|
| 538 |
+
"negatives": list(negatives),
|
| 539 |
+
}
|
| 540 |
+
if card_result is not None:
|
| 541 |
+
card_result["seed_debug"] = seed_debug
|
| 542 |
+
else:
|
| 543 |
+
# Return a sentinel-less None as before, but tuck seeds where the
|
| 544 |
+
# caller can still find them via hand_data for the failure overlay.
|
| 545 |
+
hand_data["_sam_card_seed_debug"] = seed_debug
|
| 546 |
+
return card_result
|
| 547 |
|
| 548 |
|
| 549 |
def measure_finger(
|
|
|
|
| 1125 |
scale_px_per_cm=px_per_cm,
|
| 1126 |
)
|
| 1127 |
|
| 1128 |
+
# SAM card-detection seed points (prompt geometry). Falls back to
|
| 1129 |
+
# the hand_data slot when card detection returned None so we can
|
| 1130 |
+
# still see what was prompted into SAM.
|
| 1131 |
+
seed_debug = None
|
| 1132 |
+
if card_result is not None:
|
| 1133 |
+
seed_debug = card_result.get("seed_debug")
|
| 1134 |
+
if seed_debug is None and hand_data is not None:
|
| 1135 |
+
seed_debug = hand_data.get("_sam_card_seed_debug")
|
| 1136 |
+
debug_image = _overlay_card_seeds(
|
| 1137 |
+
debug_image, seed_debug, rotation_matrix=rotation_matrix
|
| 1138 |
+
)
|
| 1139 |
+
|
| 1140 |
# Save result image (downscaled + JPEG-encoded for speed)
|
| 1141 |
_save_debug_visualization(result_png_path, debug_image)
|
| 1142 |
print(f"Result visualization saved to: {result_png_path}")
|
|
|
|
| 1460 |
else:
|
| 1461 |
card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
|
| 1462 |
if card_result is None:
|
| 1463 |
+
# Emit a diagnostic visualization so the failure is debuggable:
|
| 1464 |
+
# hand mask + card-prompt seeds on the canonical image. Without
|
| 1465 |
+
# this, a card_not_detected failure on HF leaves no PNG to pull.
|
| 1466 |
+
if result_png_path is not None:
|
| 1467 |
+
vis = image_canonical.copy()
|
| 1468 |
+
vis = _overlay_sam_masks(vis, hand_mask=hand_data.get("mask"))
|
| 1469 |
+
vis = _overlay_hand_skeleton(vis, landmarks=hand_data.get("landmarks"))
|
| 1470 |
+
vis = _overlay_card_seeds(
|
| 1471 |
+
vis, hand_data.get("_sam_card_seed_debug")
|
| 1472 |
+
)
|
| 1473 |
+
_save_debug_visualization(result_png_path, vis)
|
| 1474 |
+
print(f"[multi] Card-not-detected viz saved to: {result_png_path}")
|
| 1475 |
return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
|
| 1476 |
px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
|
| 1477 |
view_angle_ok = scale_confidence > 0.9
|
|
|
|
| 1610 |
if card_result is not None:
|
| 1611 |
vis = draw_card_overlay(vis, card_result, px_per_cm)
|
| 1612 |
|
| 1613 |
+
# SAM card-detection seed points (prompt geometry). Multi-finger viz
|
| 1614 |
+
# runs in the canonical frame with no precise-rotation applied, so no
|
| 1615 |
+
# rotation matrix is needed here.
|
| 1616 |
+
seed_debug = None
|
| 1617 |
+
if card_result is not None:
|
| 1618 |
+
seed_debug = card_result.get("seed_debug")
|
| 1619 |
+
vis = _overlay_card_seeds(vis, seed_debug)
|
| 1620 |
+
|
| 1621 |
# Draw per-finger Sobel edge overlays
|
| 1622 |
for fn, result in per_finger_raw.items():
|
| 1623 |
internal = result.get("_internal")
|
script/validate_sam_card.py
CHANGED
|
@@ -102,7 +102,8 @@ def run_one(img_path: Path) -> dict:
|
|
| 102 |
return rec
|
| 103 |
mp = landmarks[10, :2]
|
| 104 |
anchor_xy = (int(round(mp[0])), int(round(mp[1])))
|
| 105 |
-
|
|
|
|
| 106 |
rec["prompt_n_seeds"] = len(seeds)
|
| 107 |
negs = _negatives_from_landmarks(hand_data["landmarks"])
|
| 108 |
t0 = time.time()
|
|
@@ -112,6 +113,7 @@ def run_one(img_path: Path) -> dict:
|
|
| 112 |
seed_points=seeds,
|
| 113 |
negative_points=negs,
|
| 114 |
debug_dir=str(prompt_debug),
|
|
|
|
| 115 |
)
|
| 116 |
if pr is not None:
|
| 117 |
px_cm, _ = compute_scale_factor(pr["corners"])
|
|
|
|
| 102 |
return rec
|
| 103 |
mp = landmarks[10, :2]
|
| 104 |
anchor_xy = (int(round(mp[0])), int(round(mp[1])))
|
| 105 |
+
seed_info = suggest_card_seeds(hand_data["mask"], canonical.shape[:2], anchor_xy)
|
| 106 |
+
seeds = seed_info["kept"]
|
| 107 |
rec["prompt_n_seeds"] = len(seeds)
|
| 108 |
negs = _negatives_from_landmarks(hand_data["landmarks"])
|
| 109 |
t0 = time.time()
|
|
|
|
| 113 |
seed_points=seeds,
|
| 114 |
negative_points=negs,
|
| 115 |
debug_dir=str(prompt_debug),
|
| 116 |
+
hand_mask=hand_data["mask"],
|
| 117 |
)
|
| 118 |
if pr is not None:
|
| 119 |
px_cm, _ = compute_scale_factor(pr["corners"])
|
src/sam_card_detection.py
CHANGED
|
@@ -43,6 +43,7 @@ POINTS_PER_BATCH = 64
|
|
| 43 |
# Candidate filtering
|
| 44 |
MIN_RECTANGULARITY = 0.90 # mask_area / minAreaRect_area; card mask is near-perfect rectangle
|
| 45 |
ASPECT_RATIO_TOLERANCE = 0.15 # fractional deviation from 1.586
|
|
|
|
| 46 |
|
| 47 |
_pipeline = None # lazy singleton
|
| 48 |
|
|
@@ -113,6 +114,7 @@ def _mask_to_bool_array(mask: Any, target_shape: Optional[Tuple[int, int]] = Non
|
|
| 113 |
def _score_card_mask(
|
| 114 |
mask: np.ndarray,
|
| 115 |
image_area: float,
|
|
|
|
| 116 |
) -> Optional[Dict[str, Any]]:
|
| 117 |
"""Score a candidate mask for being a credit card.
|
| 118 |
|
|
@@ -142,6 +144,22 @@ def _score_card_mask(
|
|
| 142 |
if contour_area <= 0:
|
| 143 |
return None
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
rect = cv2.minAreaRect(contour)
|
| 146 |
box = cv2.boxPoints(rect)
|
| 147 |
rect_area = cv2.contourArea(box.astype(np.float32))
|
|
@@ -323,14 +341,18 @@ def suggest_card_seeds(
|
|
| 323 |
hand_mask: np.ndarray,
|
| 324 |
image_shape: Tuple[int, int],
|
| 325 |
anchor_xy: Tuple[int, int],
|
| 326 |
-
) -> List[Tuple[int, int]]:
|
| 327 |
"""Cross-shaped seed points through the anchor (middle-finger PIP).
|
| 328 |
|
| 329 |
Users place the credit card either above/below or left/right of the
|
| 330 |
middle-finger PIP, so a rake along the horizontal and vertical lines
|
| 331 |
through PIP catches it with far fewer prompts than a dense grid.
|
| 332 |
-
5 points per arm (step 0.15·dim), shared center deduped
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
"""
|
| 335 |
h, w = image_shape
|
| 336 |
mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
|
|
@@ -346,16 +368,20 @@ def suggest_card_seeds(
|
|
| 346 |
for k in (-2, -1, 1, 2):
|
| 347 |
candidates.append((ax, ay + k * dy))
|
| 348 |
|
| 349 |
-
|
|
|
|
| 350 |
seen: set = set()
|
| 351 |
for px, py in candidates:
|
| 352 |
px = max(0, min(w - 1, px))
|
| 353 |
py = max(0, min(h - 1, py))
|
| 354 |
-
if (px, py) in seen
|
| 355 |
continue
|
| 356 |
seen.add((px, py))
|
| 357 |
-
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
|
| 361 |
def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
|
|
@@ -446,6 +472,7 @@ def detect_credit_card_sam_prompt(
|
|
| 446 |
seed_points: List[Tuple[int, int]],
|
| 447 |
negative_points: Optional[List[Tuple[int, int]]] = None,
|
| 448 |
debug_dir: Optional[str] = None,
|
|
|
|
| 449 |
) -> Optional[Dict[str, Any]]:
|
| 450 |
"""Prompt-based SAM 2.1 credit card detection.
|
| 451 |
|
|
@@ -528,13 +555,26 @@ def detect_credit_card_sam_prompt(
|
|
| 528 |
iou_scores = outputs.iou_scores.cpu().numpy()[0]
|
| 529 |
infer_time = time.time() - t0
|
| 530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
scored: List[Dict[str, Any]] = []
|
| 532 |
scaled_candidate_masks: List[np.ndarray] = []
|
| 533 |
for prompt_idx in range(masks_tensor.shape[0]):
|
| 534 |
for cand_idx in range(masks_tensor.shape[1]):
|
| 535 |
mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
|
| 536 |
scaled_candidate_masks.append(mask_scaled)
|
| 537 |
-
result = _score_card_mask(mask_scaled, scaled_area)
|
| 538 |
if result is not None:
|
| 539 |
result["seed_idx"] = prompt_idx
|
| 540 |
result["cand_idx"] = cand_idx
|
|
|
|
| 43 |
# Candidate filtering
|
| 44 |
MIN_RECTANGULARITY = 0.90 # mask_area / minAreaRect_area; card mask is near-perfect rectangle
|
| 45 |
ASPECT_RATIO_TOLERANCE = 0.15 # fractional deviation from 1.586
|
| 46 |
+
MAX_HAND_OVERLAP_RATIO = 0.20 # reject candidates that swallow the hand (background paper, tabletop)
|
| 47 |
|
| 48 |
_pipeline = None # lazy singleton
|
| 49 |
|
|
|
|
| 114 |
def _score_card_mask(
|
| 115 |
mask: np.ndarray,
|
| 116 |
image_area: float,
|
| 117 |
+
hand_mask: Optional[np.ndarray] = None,
|
| 118 |
) -> Optional[Dict[str, Any]]:
|
| 119 |
"""Score a candidate mask for being a credit card.
|
| 120 |
|
|
|
|
| 144 |
if contour_area <= 0:
|
| 145 |
return None
|
| 146 |
|
| 147 |
+
# Reject candidates whose convex hull engulfs the hand. When SAM is
|
| 148 |
+
# prompted to segment the background paper, it returns the paper mask
|
| 149 |
+
# with the hand carved *out* of it — so raw AND(mask, hand) is ~0
|
| 150 |
+
# even though the hand sits visually on top of the paper. The convex
|
| 151 |
+
# hull closes that hand-shaped hole, exposing the engulfment.
|
| 152 |
+
if hand_mask is not None and mask.shape == hand_mask.shape:
|
| 153 |
+
hand_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
|
| 154 |
+
hand_area = float(hand_bool.sum())
|
| 155 |
+
if hand_area > 0:
|
| 156 |
+
hull_mask = np.zeros(mask.shape, dtype=np.uint8)
|
| 157 |
+
cv2.fillPoly(hull_mask, [contour.astype(np.int32)], 255)
|
| 158 |
+
hull_bool = hull_mask.astype(bool)
|
| 159 |
+
overlap = float(np.logical_and(hull_bool, hand_bool).sum())
|
| 160 |
+
if overlap / hand_area > MAX_HAND_OVERLAP_RATIO:
|
| 161 |
+
return None
|
| 162 |
+
|
| 163 |
rect = cv2.minAreaRect(contour)
|
| 164 |
box = cv2.boxPoints(rect)
|
| 165 |
rect_area = cv2.contourArea(box.astype(np.float32))
|
|
|
|
| 341 |
hand_mask: np.ndarray,
|
| 342 |
image_shape: Tuple[int, int],
|
| 343 |
anchor_xy: Tuple[int, int],
|
| 344 |
+
) -> Dict[str, List[Tuple[int, int]]]:
|
| 345 |
"""Cross-shaped seed points through the anchor (middle-finger PIP).
|
| 346 |
|
| 347 |
Users place the credit card either above/below or left/right of the
|
| 348 |
middle-finger PIP, so a rake along the horizontal and vertical lines
|
| 349 |
through PIP catches it with far fewer prompts than a dense grid.
|
| 350 |
+
5 points per arm (step 0.15·dim), shared center deduped.
|
| 351 |
+
|
| 352 |
+
Returns a dict with two lists:
|
| 353 |
+
- "kept": seeds that passed the hand-mask filter (sent to SAM).
|
| 354 |
+
- "dropped": seeds whose (x, y) landed inside the hand mask and
|
| 355 |
+
were filtered out. Retained purely for debug visualization.
|
| 356 |
"""
|
| 357 |
h, w = image_shape
|
| 358 |
mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
|
|
|
|
| 368 |
for k in (-2, -1, 1, 2):
|
| 369 |
candidates.append((ax, ay + k * dy))
|
| 370 |
|
| 371 |
+
kept: List[Tuple[int, int]] = []
|
| 372 |
+
dropped: List[Tuple[int, int]] = []
|
| 373 |
seen: set = set()
|
| 374 |
for px, py in candidates:
|
| 375 |
px = max(0, min(w - 1, px))
|
| 376 |
py = max(0, min(h - 1, py))
|
| 377 |
+
if (px, py) in seen:
|
| 378 |
continue
|
| 379 |
seen.add((px, py))
|
| 380 |
+
if mask_bool[py, px]:
|
| 381 |
+
dropped.append((px, py))
|
| 382 |
+
else:
|
| 383 |
+
kept.append((px, py))
|
| 384 |
+
return {"kept": kept, "dropped": dropped}
|
| 385 |
|
| 386 |
|
| 387 |
def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
|
|
|
|
| 472 |
seed_points: List[Tuple[int, int]],
|
| 473 |
negative_points: Optional[List[Tuple[int, int]]] = None,
|
| 474 |
debug_dir: Optional[str] = None,
|
| 475 |
+
hand_mask: Optional[np.ndarray] = None,
|
| 476 |
) -> Optional[Dict[str, Any]]:
|
| 477 |
"""Prompt-based SAM 2.1 credit card detection.
|
| 478 |
|
|
|
|
| 555 |
iou_scores = outputs.iou_scores.cpu().numpy()[0]
|
| 556 |
infer_time = time.time() - t0
|
| 557 |
|
| 558 |
+
# Resize the hand mask into the same scaled 1024-space the candidate
|
| 559 |
+
# masks live in, so overlap rejection works without upscaling every
|
| 560 |
+
# candidate to full resolution.
|
| 561 |
+
hand_mask_scaled: Optional[np.ndarray] = None
|
| 562 |
+
if hand_mask is not None:
|
| 563 |
+
hand_u8 = (hand_mask.astype(bool).astype(np.uint8) * 255)
|
| 564 |
+
if hand_u8.shape != (scaled_h, scaled_w):
|
| 565 |
+
hand_u8 = cv2.resize(
|
| 566 |
+
hand_u8, (scaled_w, scaled_h),
|
| 567 |
+
interpolation=cv2.INTER_NEAREST,
|
| 568 |
+
)
|
| 569 |
+
hand_mask_scaled = hand_u8.astype(bool)
|
| 570 |
+
|
| 571 |
scored: List[Dict[str, Any]] = []
|
| 572 |
scaled_candidate_masks: List[np.ndarray] = []
|
| 573 |
for prompt_idx in range(masks_tensor.shape[0]):
|
| 574 |
for cand_idx in range(masks_tensor.shape[1]):
|
| 575 |
mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
|
| 576 |
scaled_candidate_masks.append(mask_scaled)
|
| 577 |
+
result = _score_card_mask(mask_scaled, scaled_area, hand_mask=hand_mask_scaled)
|
| 578 |
if result is not None:
|
| 579 |
result["seed_idx"] = prompt_idx
|
| 580 |
result["cand_idx"] = cand_idx
|