feng-x commited on
Commit
6cd4ed9
Β·
verified Β·
1 Parent(s): 947c3d4

Upload folder using huggingface_hub

Browse files
src/sam_card_detection.py CHANGED
@@ -48,12 +48,24 @@ MAX_HULL_HAND_FILL_RATIO = 0.05
48
  # aspect ratio purely by accident. A real credit card held alongside a hand
49
  # is ~5-15% of the frame; 25% is already 2Γ— the realistic maximum.
50
  SAM_MAX_CARD_AREA_RATIO = 0.25
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  def _score_card_mask(
54
  mask: np.ndarray,
55
  image_area: float,
56
  hand_mask: Optional[np.ndarray] = None,
 
 
57
  ) -> Optional[Dict[str, Any]]:
58
  """Score a candidate mask for being a credit card.
59
 
@@ -77,12 +89,21 @@ def _score_card_mask(
77
  # x86 vs Apple Silicon can bump `contour_area / rect_area` below 0.90 purely
78
  # from Torch CPU activation drift). Non-card shapes stay non-rectangular
79
  # under their hull, so this does not create false positives.
80
- contour = max(contours, key=cv2.contourArea)
81
- contour = cv2.convexHull(contour)
82
  contour_area = cv2.contourArea(contour)
83
  if contour_area <= 0:
84
  return None
85
 
 
 
 
 
 
 
 
 
 
86
  # Reject candidates whose convex hull engulfs the hand. When SAM is
87
  # prompted to segment the background paper, it returns the paper mask
88
  # with the hand carved *out* of it β€” so raw AND(mask, hand) is ~0
@@ -116,16 +137,43 @@ def _score_card_mask(
116
  if width <= 0 or height <= 0:
117
  return None
118
 
 
 
 
 
 
 
 
 
 
119
  aspect_ratio = max(width, height) / min(width, height)
120
  ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
121
  if ratio_diff > ASPECT_RATIO_TOLERANCE:
122
  return None
123
 
124
- # Higher score: better rectangularity + tighter aspect ratio match + meaningful size
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  ratio_score = 1.0 - ratio_diff / ASPECT_RATIO_TOLERANCE
126
  rect_score = (rectangularity - MIN_RECTANGULARITY) / (1.0 - MIN_RECTANGULARITY)
127
  area_score = min(area_ratio / 0.1, 1.0) # caps at 10% of image area
128
- score = 0.4 * ratio_score + 0.4 * rect_score + 0.2 * area_score
 
 
 
 
 
129
 
130
  return {
131
  "corners": corners,
@@ -400,12 +448,20 @@ def detect_credit_card_sam_prompt(
400
  for cand_idx in range(masks_tensor.shape[1]):
401
  mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
402
  scaled_candidate_masks.append(mask_scaled)
403
- result = _score_card_mask(mask_scaled, scaled_area, hand_mask=hand_mask_scaled)
 
 
 
 
 
404
  if result is not None:
405
  result["seed_idx"] = prompt_idx
406
  result["cand_idx"] = cand_idx
407
- result["iou_score"] = float(iou_scores[prompt_idx, cand_idx])
408
- result["mask_scaled"] = mask_scaled
 
 
 
409
  scored.append(result)
410
 
411
  scored.sort(key=lambda d: d["score"], reverse=True)
 
48
  # aspect ratio purely by accident. A real credit card held alongside a hand
49
  # is ~5-15% of the frame; 25% is already 2Γ— the realistic maximum.
50
  SAM_MAX_CARD_AREA_RATIO = 0.25
51
+ # Reject candidates whose longer side spans more of the image short side
52
+ # than any real card photo plausibly would. This catches the distinctive
53
+ # SAM failure where a single-prompt mask grabs the entire background paper
54
+ # / tabletop: the candidate is long and thin (so its mask area sneaks
55
+ # under SAM_MAX_CARD_AREA_RATIO) but its bounding rectangle stretches
56
+ # across nearly the full image short side (framing ratio ~0.99). Threshold
57
+ # picked from doc/report/framing_ratio_survey.md: max observed in 47 KOL
58
+ # successes is 0.532, max in calibration is 0.486; 0.70 leaves β‰₯30% margin
59
+ # above legitimate framing while sitting well below the ~1.0 failure mode.
60
+ MAX_CARD_FRAMING_RATIO = 0.70
61
 
62
 
63
  def _score_card_mask(
64
  mask: np.ndarray,
65
  image_area: float,
66
  hand_mask: Optional[np.ndarray] = None,
67
+ image_short_side: float = 0.0,
68
+ iou_score: float = 0.0,
69
  ) -> Optional[Dict[str, Any]]:
70
  """Score a candidate mask for being a credit card.
71
 
 
89
  # x86 vs Apple Silicon can bump `contour_area / rect_area` below 0.90 purely
90
  # from Torch CPU activation drift). Non-card shapes stay non-rectangular
91
  # under their hull, so this does not create false positives.
92
+ largest_contour = max(contours, key=cv2.contourArea)
93
+ contour = cv2.convexHull(largest_contour)
94
  contour_area = cv2.contourArea(contour)
95
  if contour_area <= 0:
96
  return None
97
 
98
+ # Replace the raw multi-blob SAM mask with just the largest connected
99
+ # component. The card prompt with multimask_output=True occasionally lassos
100
+ # background paper between fingers as part of the same candidate; those
101
+ # blobs pass scoring (we only check the largest contour) but pollute every
102
+ # downstream consumer of `result["mask"]` (debug overlays, the result PNG).
103
+ clean_mask_u8 = np.zeros_like(mask_u8)
104
+ cv2.drawContours(clean_mask_u8, [largest_contour], -1, 255, thickness=cv2.FILLED)
105
+ mask = clean_mask_u8.astype(bool)
106
+
107
  # Reject candidates whose convex hull engulfs the hand. When SAM is
108
  # prompted to segment the background paper, it returns the paper mask
109
  # with the hand carved *out* of it β€” so raw AND(mask, hand) is ~0
 
137
  if width <= 0 or height <= 0:
138
  return None
139
 
140
+ # Reject long-thin SAM false positives that span ~the entire image short
141
+ # side. These slip past SAM_MAX_CARD_AREA_RATIO because their pixel
142
+ # count is modest (the mask is hollow / not solidly filled), but their
143
+ # bounding rectangle gives them away.
144
+ if image_short_side > 0:
145
+ framing_ratio = max(width, height) / image_short_side
146
+ if framing_ratio > MAX_CARD_FRAMING_RATIO:
147
+ return None
148
+
149
  aspect_ratio = max(width, height) / min(width, height)
150
  ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
151
  if ratio_diff > ASPECT_RATIO_TOLERANCE:
152
  return None
153
 
154
+ # Score components β€” picking weights here is delicate because real
155
+ # photos have *perspective foreshortening* that pulls the apparent card
156
+ # aspect away from the flat-card ideal of 1.586. A mask that bleeds
157
+ # extra background paper onto the short edge can pull aspect *closer*
158
+ # to the ideal than a tight mask, so over-weighting ratio_score selects
159
+ # fattened masks (the Brooklyn Shields case). The current split:
160
+ # * 0.3 ratio β€” kept as a soft preference but no longer dominant
161
+ # * 0.4 rect β€” primary signal; tight cards are near-perfect rectangles,
162
+ # fattened SAM masks always lose a little here
163
+ # * 0.1 area β€” small reward for "actually card-sized"
164
+ # * 0.2 iou β€” SAM's own segmentation confidence; stable across
165
+ # platforms because it's decoder-internal, not derived
166
+ # from per-pixel boundary noise. Acts as a second opinion
167
+ # that breaks the tie when geometry is too close to call.
168
  ratio_score = 1.0 - ratio_diff / ASPECT_RATIO_TOLERANCE
169
  rect_score = (rectangularity - MIN_RECTANGULARITY) / (1.0 - MIN_RECTANGULARITY)
170
  area_score = min(area_ratio / 0.1, 1.0) # caps at 10% of image area
171
+ score = (
172
+ 0.3 * ratio_score
173
+ + 0.4 * rect_score
174
+ + 0.1 * area_score
175
+ + 0.2 * iou_score
176
+ )
177
 
178
  return {
179
  "corners": corners,
 
448
  for cand_idx in range(masks_tensor.shape[1]):
449
  mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
450
  scaled_candidate_masks.append(mask_scaled)
451
+ iou = float(iou_scores[prompt_idx, cand_idx])
452
+ result = _score_card_mask(
453
+ mask_scaled, scaled_area, hand_mask=hand_mask_scaled,
454
+ image_short_side=float(min(scaled_h, scaled_w)),
455
+ iou_score=iou,
456
+ )
457
  if result is not None:
458
  result["seed_idx"] = prompt_idx
459
  result["cand_idx"] = cand_idx
460
+ result["iou_score"] = iou
461
+ # `result["mask"]` is the cleaned (largest-component) mask;
462
+ # keep that as the scaled-space mask so upscaling and debug
463
+ # rendering both see the cleaned version.
464
+ result["mask_scaled"] = result["mask"]
465
  scored.append(result)
466
 
467
  scored.sort(key=lambda d: d["score"], reverse=True)
web_demo/static/mobile/steps/guide.js CHANGED
@@ -26,7 +26,7 @@ export default {
26
  <ul class="capture-tips">
27
  <li>Place a card of <strong>standard credit card size</strong> beside your hand.</li>
28
  <li>Hold phone <strong>directly above hand</strong>, parallel to table.</li>
29
- <li>Use <strong>plain white background</strong>, a sheet of paper works great.</li>
30
  </ul>
31
 
32
  <figure class="guide-example">
 
26
  <ul class="capture-tips">
27
  <li>Place a card of <strong>standard credit card size</strong> beside your hand.</li>
28
  <li>Hold phone <strong>directly above hand</strong>, parallel to table.</li>
29
+ <li>Use <strong>flat, plain background</strong>, a sheet of paper works great.</li>
30
  </ul>
31
 
32
  <figure class="guide-example">