feng-x commited on
Commit
e17df6f
·
verified ·
1 Parent(s): 232f909

Upload folder using huggingface_hub

Browse files
measure_finger.py CHANGED
@@ -386,6 +386,81 @@ def _overlay_sam_masks(
386
  return out
387
 
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  def _save_debug_visualization(path: str, image: np.ndarray) -> None:
390
  """Downscale + fast-encode a debug overlay image.
391
 
@@ -434,7 +509,9 @@ def _sam_card_detect(
434
  middle_pip = landmarks[10, :2]
435
  anchor_xy = (int(round(middle_pip[0])), int(round(middle_pip[1])))
436
 
437
- seeds = suggest_card_seeds(hand_mask, image_canonical.shape[:2], anchor_xy)
 
 
438
  if not seeds:
439
  return None
440
 
@@ -445,12 +522,28 @@ def _sam_card_detect(
445
  ]
446
 
447
  prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
448
- return detect_credit_card_sam_prompt(
449
  image_canonical,
450
  seed_points=seeds,
451
  negative_points=negatives,
452
  debug_dir=prompt_debug,
 
453
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
 
456
  def measure_finger(
@@ -1032,6 +1125,18 @@ def measure_finger(
1032
  scale_px_per_cm=px_per_cm,
1033
  )
1034
 
 
 
 
 
 
 
 
 
 
 
 
 
1035
  # Save result image (downscaled + JPEG-encoded for speed)
1036
  _save_debug_visualization(result_png_path, debug_image)
1037
  print(f"Result visualization saved to: {result_png_path}")
@@ -1355,6 +1460,18 @@ def measure_multi_finger(
1355
  else:
1356
  card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
1357
  if card_result is None:
 
 
 
 
 
 
 
 
 
 
 
 
1358
  return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
1359
  px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
1360
  view_angle_ok = scale_confidence > 0.9
@@ -1493,6 +1610,14 @@ def _draw_multi_finger_debug(
1493
  if card_result is not None:
1494
  vis = draw_card_overlay(vis, card_result, px_per_cm)
1495
 
 
 
 
 
 
 
 
 
1496
  # Draw per-finger Sobel edge overlays
1497
  for fn, result in per_finger_raw.items():
1498
  internal = result.get("_internal")
 
386
  return out
387
 
388
 
389
+ def _overlay_card_seeds(
390
+ image: np.ndarray,
391
+ seed_debug: Optional[Dict[str, Any]],
392
+ rotation_matrix: Optional[np.ndarray] = None,
393
+ ) -> np.ndarray:
394
+ """Draw SAM card-detection prompt points on a debug image.
395
+
396
+ Positive seeds in green, palm-center negative in red, middle-PIP anchor
397
+ as a cyan cross. All points are assumed to be in the canonical (pre-
398
+ precise-rotation) frame; pass ``rotation_matrix`` to align with an image
399
+ that had the finger rotation applied.
400
+ """
401
+ if not seed_debug:
402
+ return image
403
+ from src.geometry import transform_points_rotation
404
+
405
+ def _xform(points: List[Tuple[int, int]]) -> np.ndarray:
406
+ arr = np.asarray(points, dtype=np.float32)
407
+ if rotation_matrix is not None and len(arr) > 0:
408
+ arr = transform_points_rotation(arr, rotation_matrix)
409
+ return arr
410
+
411
+ out = image
412
+ h, w = out.shape[:2]
413
+ # Thin, map-style reference crosses: small, single-pixel-wide, softened.
414
+ marker_size = max(14, int(round(0.012 * max(h, w))))
415
+ thickness = 2
416
+
417
+ # Pastel variants so the markers read as reference lines rather than
418
+ # high-contrast callouts.
419
+ DROP_COLOR = (120, 220, 220) # soft yellow
420
+ KEEP_COLOR = (120, 220, 120) # soft green
421
+ NEG_COLOR = (120, 120, 220) # soft red
422
+ ANCHOR_COLOR = (220, 220, 120) # soft cyan
423
+
424
+ def _plus(pt, color):
425
+ cv2.drawMarker(
426
+ out, (int(pt[0]), int(pt[1])), color,
427
+ markerType=cv2.MARKER_CROSS,
428
+ markerSize=marker_size,
429
+ thickness=thickness,
430
+ line_type=cv2.LINE_AA,
431
+ )
432
+
433
+ # Seeds dropped by the hand-mask filter. Drawn first so any coincident
434
+ # kept seed paints on top.
435
+ dropped = seed_debug.get("dropped") or []
436
+ if dropped:
437
+ for pt in _xform(dropped).astype(int):
438
+ _plus(pt, DROP_COLOR)
439
+
440
+ seeds = seed_debug.get("seeds") or []
441
+ if seeds:
442
+ for pt in _xform(seeds).astype(int):
443
+ _plus(pt, KEEP_COLOR)
444
+
445
+ negatives = seed_debug.get("negatives") or []
446
+ if negatives:
447
+ for pt in _xform(negatives).astype(int):
448
+ _plus(pt, NEG_COLOR)
449
+
450
+ anchor = seed_debug.get("anchor")
451
+ if anchor is not None:
452
+ ax, ay = _xform([anchor])[0].astype(int)
453
+ # Tilted cross (X) to distinguish the anchor from the plus-shaped seeds.
454
+ cv2.drawMarker(
455
+ out, (int(ax), int(ay)), ANCHOR_COLOR,
456
+ markerType=cv2.MARKER_TILTED_CROSS,
457
+ markerSize=marker_size,
458
+ thickness=thickness,
459
+ line_type=cv2.LINE_AA,
460
+ )
461
+ return out
462
+
463
+
464
  def _save_debug_visualization(path: str, image: np.ndarray) -> None:
465
  """Downscale + fast-encode a debug overlay image.
466
 
 
509
  middle_pip = landmarks[10, :2]
510
  anchor_xy = (int(round(middle_pip[0])), int(round(middle_pip[1])))
511
 
512
+ seed_info = suggest_card_seeds(hand_mask, image_canonical.shape[:2], anchor_xy)
513
+ seeds = seed_info["kept"]
514
+ dropped_seeds = seed_info["dropped"]
515
  if not seeds:
516
  return None
517
 
 
522
  ]
523
 
524
  prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
525
+ card_result = detect_credit_card_sam_prompt(
526
  image_canonical,
527
  seed_points=seeds,
528
  negative_points=negatives,
529
  debug_dir=prompt_debug,
530
+ hand_mask=hand_mask,
531
  )
532
+ # Stash seed geometry so the final result PNG can visualize what was
533
+ # prompted into SAM, even when card detection fails.
534
+ seed_debug = {
535
+ "anchor": anchor_xy,
536
+ "seeds": list(seeds),
537
+ "dropped": list(dropped_seeds),
538
+ "negatives": list(negatives),
539
+ }
540
+ if card_result is not None:
541
+ card_result["seed_debug"] = seed_debug
542
+ else:
543
+ # Return a sentinel-less None as before, but tuck seeds where the
544
+ # caller can still find them via hand_data for the failure overlay.
545
+ hand_data["_sam_card_seed_debug"] = seed_debug
546
+ return card_result
547
 
548
 
549
  def measure_finger(
 
1125
  scale_px_per_cm=px_per_cm,
1126
  )
1127
 
1128
+ # SAM card-detection seed points (prompt geometry). Falls back to
1129
+ # the hand_data slot when card detection returned None so we can
1130
+ # still see what was prompted into SAM.
1131
+ seed_debug = None
1132
+ if card_result is not None:
1133
+ seed_debug = card_result.get("seed_debug")
1134
+ if seed_debug is None and hand_data is not None:
1135
+ seed_debug = hand_data.get("_sam_card_seed_debug")
1136
+ debug_image = _overlay_card_seeds(
1137
+ debug_image, seed_debug, rotation_matrix=rotation_matrix
1138
+ )
1139
+
1140
  # Save result image (downscaled + JPEG-encoded for speed)
1141
  _save_debug_visualization(result_png_path, debug_image)
1142
  print(f"Result visualization saved to: {result_png_path}")
 
1460
  else:
1461
  card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
1462
  if card_result is None:
1463
+ # Emit a diagnostic visualization so the failure is debuggable:
1464
+ # hand mask + card-prompt seeds on the canonical image. Without
1465
+ # this, a card_not_detected failure on HF leaves no PNG to pull.
1466
+ if result_png_path is not None:
1467
+ vis = image_canonical.copy()
1468
+ vis = _overlay_sam_masks(vis, hand_mask=hand_data.get("mask"))
1469
+ vis = _overlay_hand_skeleton(vis, landmarks=hand_data.get("landmarks"))
1470
+ vis = _overlay_card_seeds(
1471
+ vis, hand_data.get("_sam_card_seed_debug")
1472
+ )
1473
+ _save_debug_visualization(result_png_path, vis)
1474
+ print(f"[multi] Card-not-detected viz saved to: {result_png_path}")
1475
  return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
1476
  px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
1477
  view_angle_ok = scale_confidence > 0.9
 
1610
  if card_result is not None:
1611
  vis = draw_card_overlay(vis, card_result, px_per_cm)
1612
 
1613
+ # SAM card-detection seed points (prompt geometry). Multi-finger viz
1614
+ # runs in the canonical frame with no precise-rotation applied, so no
1615
+ # rotation matrix is needed here.
1616
+ seed_debug = None
1617
+ if card_result is not None:
1618
+ seed_debug = card_result.get("seed_debug")
1619
+ vis = _overlay_card_seeds(vis, seed_debug)
1620
+
1621
  # Draw per-finger Sobel edge overlays
1622
  for fn, result in per_finger_raw.items():
1623
  internal = result.get("_internal")
script/validate_sam_card.py CHANGED
@@ -102,7 +102,8 @@ def run_one(img_path: Path) -> dict:
102
  return rec
103
  mp = landmarks[10, :2]
104
  anchor_xy = (int(round(mp[0])), int(round(mp[1])))
105
- seeds = suggest_card_seeds(hand_data["mask"], canonical.shape[:2], anchor_xy)
 
106
  rec["prompt_n_seeds"] = len(seeds)
107
  negs = _negatives_from_landmarks(hand_data["landmarks"])
108
  t0 = time.time()
@@ -112,6 +113,7 @@ def run_one(img_path: Path) -> dict:
112
  seed_points=seeds,
113
  negative_points=negs,
114
  debug_dir=str(prompt_debug),
 
115
  )
116
  if pr is not None:
117
  px_cm, _ = compute_scale_factor(pr["corners"])
 
102
  return rec
103
  mp = landmarks[10, :2]
104
  anchor_xy = (int(round(mp[0])), int(round(mp[1])))
105
+ seed_info = suggest_card_seeds(hand_data["mask"], canonical.shape[:2], anchor_xy)
106
+ seeds = seed_info["kept"]
107
  rec["prompt_n_seeds"] = len(seeds)
108
  negs = _negatives_from_landmarks(hand_data["landmarks"])
109
  t0 = time.time()
 
113
  seed_points=seeds,
114
  negative_points=negs,
115
  debug_dir=str(prompt_debug),
116
+ hand_mask=hand_data["mask"],
117
  )
118
  if pr is not None:
119
  px_cm, _ = compute_scale_factor(pr["corners"])
src/sam_card_detection.py CHANGED
@@ -43,6 +43,7 @@ POINTS_PER_BATCH = 64
43
  # Candidate filtering
44
  MIN_RECTANGULARITY = 0.90 # mask_area / minAreaRect_area; card mask is near-perfect rectangle
45
  ASPECT_RATIO_TOLERANCE = 0.15 # fractional deviation from 1.586
 
46
 
47
  _pipeline = None # lazy singleton
48
 
@@ -113,6 +114,7 @@ def _mask_to_bool_array(mask: Any, target_shape: Optional[Tuple[int, int]] = Non
113
  def _score_card_mask(
114
  mask: np.ndarray,
115
  image_area: float,
 
116
  ) -> Optional[Dict[str, Any]]:
117
  """Score a candidate mask for being a credit card.
118
 
@@ -142,6 +144,22 @@ def _score_card_mask(
142
  if contour_area <= 0:
143
  return None
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  rect = cv2.minAreaRect(contour)
146
  box = cv2.boxPoints(rect)
147
  rect_area = cv2.contourArea(box.astype(np.float32))
@@ -323,14 +341,18 @@ def suggest_card_seeds(
323
  hand_mask: np.ndarray,
324
  image_shape: Tuple[int, int],
325
  anchor_xy: Tuple[int, int],
326
- ) -> List[Tuple[int, int]]:
327
  """Cross-shaped seed points through the anchor (middle-finger PIP).
328
 
329
  Users place the credit card either above/below or left/right of the
330
  middle-finger PIP, so a rake along the horizontal and vertical lines
331
  through PIP catches it with far fewer prompts than a dense grid.
332
- 5 points per arm (step 0.15·dim), shared center deduped, hand-mask
333
- points dropped.
 
 
 
 
334
  """
335
  h, w = image_shape
336
  mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
@@ -346,16 +368,20 @@ def suggest_card_seeds(
346
  for k in (-2, -1, 1, 2):
347
  candidates.append((ax, ay + k * dy))
348
 
349
- seeds: List[Tuple[int, int]] = []
 
350
  seen: set = set()
351
  for px, py in candidates:
352
  px = max(0, min(w - 1, px))
353
  py = max(0, min(h - 1, py))
354
- if (px, py) in seen or mask_bool[py, px]:
355
  continue
356
  seen.add((px, py))
357
- seeds.append((px, py))
358
- return seeds
 
 
 
359
 
360
 
361
  def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
@@ -446,6 +472,7 @@ def detect_credit_card_sam_prompt(
446
  seed_points: List[Tuple[int, int]],
447
  negative_points: Optional[List[Tuple[int, int]]] = None,
448
  debug_dir: Optional[str] = None,
 
449
  ) -> Optional[Dict[str, Any]]:
450
  """Prompt-based SAM 2.1 credit card detection.
451
 
@@ -528,13 +555,26 @@ def detect_credit_card_sam_prompt(
528
  iou_scores = outputs.iou_scores.cpu().numpy()[0]
529
  infer_time = time.time() - t0
530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  scored: List[Dict[str, Any]] = []
532
  scaled_candidate_masks: List[np.ndarray] = []
533
  for prompt_idx in range(masks_tensor.shape[0]):
534
  for cand_idx in range(masks_tensor.shape[1]):
535
  mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
536
  scaled_candidate_masks.append(mask_scaled)
537
- result = _score_card_mask(mask_scaled, scaled_area)
538
  if result is not None:
539
  result["seed_idx"] = prompt_idx
540
  result["cand_idx"] = cand_idx
 
43
  # Candidate filtering
44
  MIN_RECTANGULARITY = 0.90 # mask_area / minAreaRect_area; card mask is near-perfect rectangle
45
  ASPECT_RATIO_TOLERANCE = 0.15 # fractional deviation from 1.586
46
+ MAX_HAND_OVERLAP_RATIO = 0.20 # reject candidates that swallow the hand (background paper, tabletop)
47
 
48
  _pipeline = None # lazy singleton
49
 
 
114
  def _score_card_mask(
115
  mask: np.ndarray,
116
  image_area: float,
117
+ hand_mask: Optional[np.ndarray] = None,
118
  ) -> Optional[Dict[str, Any]]:
119
  """Score a candidate mask for being a credit card.
120
 
 
144
  if contour_area <= 0:
145
  return None
146
 
147
+ # Reject candidates whose convex hull engulfs the hand. When SAM is
148
+ # prompted to segment the background paper, it returns the paper mask
149
+ # with the hand carved *out* of it — so raw AND(mask, hand) is ~0
150
+ # even though the hand sits visually on top of the paper. The convex
151
+ # hull closes that hand-shaped hole, exposing the engulfment.
152
+ if hand_mask is not None and mask.shape == hand_mask.shape:
153
+ hand_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
154
+ hand_area = float(hand_bool.sum())
155
+ if hand_area > 0:
156
+ hull_mask = np.zeros(mask.shape, dtype=np.uint8)
157
+ cv2.fillPoly(hull_mask, [contour.astype(np.int32)], 255)
158
+ hull_bool = hull_mask.astype(bool)
159
+ overlap = float(np.logical_and(hull_bool, hand_bool).sum())
160
+ if overlap / hand_area > MAX_HAND_OVERLAP_RATIO:
161
+ return None
162
+
163
  rect = cv2.minAreaRect(contour)
164
  box = cv2.boxPoints(rect)
165
  rect_area = cv2.contourArea(box.astype(np.float32))
 
341
  hand_mask: np.ndarray,
342
  image_shape: Tuple[int, int],
343
  anchor_xy: Tuple[int, int],
344
+ ) -> Dict[str, List[Tuple[int, int]]]:
345
  """Cross-shaped seed points through the anchor (middle-finger PIP).
346
 
347
  Users place the credit card either above/below or left/right of the
348
  middle-finger PIP, so a rake along the horizontal and vertical lines
349
  through PIP catches it with far fewer prompts than a dense grid.
350
+ 5 points per arm (step 0.15·dim), shared center deduped.
351
+
352
+ Returns a dict with two lists:
353
+ - "kept": seeds that passed the hand-mask filter (sent to SAM).
354
+ - "dropped": seeds whose (x, y) landed inside the hand mask and
355
+ were filtered out. Retained purely for debug visualization.
356
  """
357
  h, w = image_shape
358
  mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
 
368
  for k in (-2, -1, 1, 2):
369
  candidates.append((ax, ay + k * dy))
370
 
371
+ kept: List[Tuple[int, int]] = []
372
+ dropped: List[Tuple[int, int]] = []
373
  seen: set = set()
374
  for px, py in candidates:
375
  px = max(0, min(w - 1, px))
376
  py = max(0, min(h - 1, py))
377
+ if (px, py) in seen:
378
  continue
379
  seen.add((px, py))
380
+ if mask_bool[py, px]:
381
+ dropped.append((px, py))
382
+ else:
383
+ kept.append((px, py))
384
+ return {"kept": kept, "dropped": dropped}
385
 
386
 
387
  def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
 
472
  seed_points: List[Tuple[int, int]],
473
  negative_points: Optional[List[Tuple[int, int]]] = None,
474
  debug_dir: Optional[str] = None,
475
+ hand_mask: Optional[np.ndarray] = None,
476
  ) -> Optional[Dict[str, Any]]:
477
  """Prompt-based SAM 2.1 credit card detection.
478
 
 
555
  iou_scores = outputs.iou_scores.cpu().numpy()[0]
556
  infer_time = time.time() - t0
557
 
558
+ # Resize the hand mask into the same scaled 1024-space the candidate
559
+ # masks live in, so overlap rejection works without upscaling every
560
+ # candidate to full resolution.
561
+ hand_mask_scaled: Optional[np.ndarray] = None
562
+ if hand_mask is not None:
563
+ hand_u8 = (hand_mask.astype(bool).astype(np.uint8) * 255)
564
+ if hand_u8.shape != (scaled_h, scaled_w):
565
+ hand_u8 = cv2.resize(
566
+ hand_u8, (scaled_w, scaled_h),
567
+ interpolation=cv2.INTER_NEAREST,
568
+ )
569
+ hand_mask_scaled = hand_u8.astype(bool)
570
+
571
  scored: List[Dict[str, Any]] = []
572
  scaled_candidate_masks: List[np.ndarray] = []
573
  for prompt_idx in range(masks_tensor.shape[0]):
574
  for cand_idx in range(masks_tensor.shape[1]):
575
  mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
576
  scaled_candidate_masks.append(mask_scaled)
577
+ result = _score_card_mask(mask_scaled, scaled_area, hand_mask=hand_mask_scaled)
578
  if result is not None:
579
  result["seed_idx"] = prompt_idx
580
  result["cand_idx"] = cand_idx