feng-x commited on
Commit
22df1ea
·
verified ·
1 Parent(s): 31bf511

Upload folder using huggingface_hub

Browse files
measure_finger.py CHANGED
@@ -13,13 +13,17 @@ import argparse
13
  import json
14
  import sys
15
  from pathlib import Path
16
- from typing import Optional, Dict, Any, Literal
17
 
18
  import cv2
19
  import numpy as np
20
 
21
  from src.image_quality import assess_image_quality
22
  from src.card_detection import detect_credit_card, compute_scale_factor
 
 
 
 
23
  from src.finger_segmentation import segment_hand, isolate_finger, clean_mask, get_finger_contour
24
  from src.geometry import estimate_finger_axis, localize_ring_zone, localize_ring_zone_from_landmarks, compute_cross_section_width
25
  from src.edge_refinement import refine_edges_sobel, should_use_sobel_measurement, compare_edge_methods
@@ -30,7 +34,7 @@ from src.confidence import (
30
  compute_edge_quality_confidence,
31
  compute_overall_confidence,
32
  )
33
- from src.debug_observer import draw_comprehensive_edge_overlay
34
  from src.ring_size import recommend_ring_size, aggregate_ring_sizes, VALID_RING_MODELS, DEFAULT_RING_MODEL
35
  from src.image_quality import (
36
  check_card_in_frame,
@@ -114,9 +118,9 @@ Examples:
114
  parser.add_argument(
115
  "--edge-method",
116
  type=str,
117
- default="auto",
118
- choices=["auto", "contour", "sobel", "compare"],
119
- help="Edge detection method: auto (quality-based), contour (v0), sobel (v1), compare (both) (default: auto)",
120
  )
121
  parser.add_argument(
122
  "--sobel-threshold",
@@ -168,6 +172,20 @@ Examples:
168
  action="store_true",
169
  help="[TESTING ONLY] Skip card detection and use dummy scale (allows testing finger segmentation without card)",
170
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  return parser.parse_args()
173
 
@@ -270,6 +288,162 @@ def save_output(output: Dict[str, Any], output_path: str) -> None:
270
  json.dump(output, f, indent=2)
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  def measure_finger(
274
  image: np.ndarray,
275
  finger_index: FingerIndex = "index",
@@ -277,11 +451,13 @@ def measure_finger(
277
  save_intermediate: bool = False,
278
  result_png_path: Optional[str] = None,
279
  save_debug: bool = False,
280
- edge_method: str = "auto",
281
  sobel_threshold: float = 15.0,
282
  sobel_kernel_size: int = 3,
283
  use_subpixel: bool = True,
284
  skip_card_detection: bool = False,
 
 
285
  ring_model: str = DEFAULT_RING_MODEL,
286
  ) -> Dict[str, Any]:
287
  """
@@ -302,16 +478,14 @@ def measure_finger(
302
  Returns:
303
  Output dictionary with measurement results
304
  """
305
- # Phase 2: Image quality check
306
  quality = assess_image_quality(image)
307
  print(f"Image quality: blur={quality['blur_score']:.1f}, "
308
  f"brightness={quality['brightness']:.1f}, "
309
  f"contrast={quality['contrast']:.1f}")
310
-
311
  if not quality["passed"]:
312
  for issue in quality["issues"]:
313
- print(f" Warning: {issue}")
314
- return create_output(fail_reason=quality["fail_reason"])
315
 
316
  # Phase 3: Hand & finger segmentation (MOVED BEFORE CARD DETECTION)
317
  # This allows us to rotate the image to canonical orientation first
@@ -320,7 +494,12 @@ def measure_finger(
320
  if save_debug and result_png_path is not None:
321
  finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
322
 
323
- hand_data = segment_hand(image, finger=finger_index, debug_dir=finger_debug_dir)
 
 
 
 
 
324
 
325
  if hand_data is None:
326
  print("No hand detected in image")
@@ -358,7 +537,12 @@ def measure_finger(
358
  view_angle_ok = True
359
  card_detected = False
360
  else:
361
- card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
 
 
 
 
 
362
 
363
  if card_result is None:
364
  print("Credit card not detected in image")
@@ -390,6 +574,12 @@ def measure_finger(
390
 
391
  # Phase 5: Finger isolation (hand already segmented in Phase 3)
392
  h_can, w_can = image_canonical.shape[:2]
 
 
 
 
 
 
393
  finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
394
 
395
  if finger_data is None:
@@ -497,6 +687,16 @@ def measure_finger(
497
  borderValue=0
498
  )
499
 
 
 
 
 
 
 
 
 
 
 
500
  print(f"Rotation applied: {angle_from_vertical:.1f}° CW, finger now vertical")
501
  else:
502
  print(f"Finger axis is {angle_from_vertical:.1f}° from vertical (within {rotation_threshold}° threshold, no rotation needed)")
@@ -560,15 +760,33 @@ def measure_finger(
560
  sobel_measurement = None
561
  sobel_failed = False
562
 
563
- if edge_method in ["sobel", "auto", "compare"]:
564
  try:
565
- print(f"Running Sobel edge refinement (threshold={sobel_threshold}, kernel={sobel_kernel_size})...")
566
-
 
 
 
 
 
 
 
 
 
 
567
  # Create debug directory for edge refinement if debug enabled
568
  edge_debug_dir = None
569
  if save_debug and result_png_path is not None:
570
  edge_debug_dir = str(Path(result_png_path).parent / "edge_refinement_debug")
571
-
 
 
 
 
 
 
 
 
572
  sobel_measurement = refine_edges_sobel(
573
  image=image_canonical, # Use canonical orientation
574
  axis_data=axis_data,
@@ -578,27 +796,29 @@ def measure_finger(
578
  sobel_threshold=sobel_threshold,
579
  kernel_size=sobel_kernel_size,
580
  use_subpixel=use_subpixel,
 
581
  debug_dir=edge_debug_dir,
 
 
582
  )
583
 
584
  sobel_width_cm = sobel_measurement["median_width_cm"]
585
- print(f"Sobel width: {sobel_width_cm:.4f}cm "
586
  f"({sobel_measurement['num_samples']} samples, "
587
  f"std={sobel_measurement['std_width_px']:.2f}px, "
588
  f"quality={sobel_measurement['edge_quality']['overall_score']:.3f})")
589
 
590
  except Exception as e:
591
- print(f"Sobel edge refinement failed: {e}")
592
  sobel_failed = True
593
- if edge_method == "sobel":
594
- # User explicitly requested Sobel, fail if it doesn't work
595
  return create_output(
596
  card_detected=card_detected,
597
  finger_detected=True,
598
  scale_px_per_cm=px_per_cm,
599
  view_angle_ok=view_angle_ok,
600
  fail_reason="sobel_edge_refinement_failed",
601
- edge_method_used="sobel",
602
  )
603
 
604
  # Select measurement method based on edge_method flag
@@ -616,6 +836,12 @@ def measure_finger(
616
  median_width_cm = sobel_measurement["median_width_cm"]
617
  edge_method_used = "sobel"
618
 
 
 
 
 
 
 
619
  elif edge_method == "auto":
620
  # Automatic selection based on quality
621
  if sobel_measurement and not sobel_failed:
@@ -684,7 +910,7 @@ def measure_finger(
684
 
685
  # Calculate edge quality confidence (v1)
686
  edge_quality_conf = None
687
- if edge_method_used in ["sobel", "compare"]:
688
  edge_quality_conf = compute_edge_quality_confidence(
689
  final_measurement.get("edge_quality")
690
  )
@@ -694,7 +920,7 @@ def measure_finger(
694
  card_conf,
695
  finger_conf,
696
  measurement_conf,
697
- edge_method="sobel" if edge_method_used in ["sobel", "compare"] else "contour",
698
  edge_quality_confidence=edge_quality_conf,
699
  )
700
 
@@ -717,7 +943,7 @@ def measure_finger(
717
  print(f"Generating result visualization...")
718
 
719
  # Use comprehensive edge overlay (based on Sobel data) + card bounding box
720
- if edge_method_used in ["sobel", "compare"] and sobel_measurement and not sobel_failed:
721
  edge_data = sobel_measurement["edge_data"]
722
  roi_bounds = sobel_measurement["roi_data"]["roi_bounds"]
723
  width_data = sobel_measurement["width_data"]
@@ -747,6 +973,25 @@ def measure_finger(
747
  # Fallback: plain image with axis/zone annotations when Sobel unavailable
748
  debug_image = image_canonical.copy()
749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  # Draw card bounding box (transform corners if image was rotated)
751
  if card_result is not None and "corners" in card_result:
752
  corners = card_result["corners"]
@@ -758,9 +1003,8 @@ def measure_finger(
758
  cv2.polylines(debug_image, [pts], isClosed=True,
759
  color=(0, 255, 0), thickness=3, lineType=cv2.LINE_AA)
760
 
761
- # Save result image
762
- Path(result_png_path).parent.mkdir(parents=True, exist_ok=True)
763
- cv2.imwrite(result_png_path, debug_image)
764
  print(f"Result visualization saved to: {result_png_path}")
765
 
766
 
@@ -789,7 +1033,7 @@ def _measure_single_finger_from_shared(
789
  view_angle_ok: bool,
790
  card_result: Optional[Dict[str, Any]],
791
  scale_confidence: float,
792
- edge_method: str = "sobel",
793
  sobel_threshold: float = 15.0,
794
  sobel_kernel_size: int = 3,
795
  use_subpixel: bool = True,
@@ -807,6 +1051,7 @@ def _measure_single_finger_from_shared(
807
  )
808
 
809
  h_can, w_can = image_canonical.shape[:2]
 
810
  finger_data = isolate_finger(hand_data, finger=finger_name, image_shape=(h_can, w_can))
811
 
812
  if finger_data is None:
@@ -858,6 +1103,11 @@ def _measure_single_finger_from_shared(
858
  cleaned_mask, rotation_matrix, (w_can, h_can),
859
  flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
860
  )
 
 
 
 
 
861
 
862
  # Ring zone
863
  try:
@@ -891,21 +1141,34 @@ def _measure_single_finger_from_shared(
891
  # Sobel measurement
892
  sobel_measurement = None
893
  sobel_failed = False
894
- if edge_method in ["sobel", "auto", "compare"]:
 
 
 
 
 
 
895
  try:
 
 
 
 
896
  sobel_measurement = refine_edges_sobel(
897
  image=img_work, axis_data=axis_data, zone_data=zone_data,
898
  scale_px_per_cm=px_per_cm, finger_landmarks=finger_data.get("landmarks"),
899
  sobel_threshold=sobel_threshold, kernel_size=sobel_kernel_size,
900
  use_subpixel=use_subpixel,
 
 
 
901
  )
902
  except Exception:
903
  sobel_failed = True
904
- if edge_method == "sobel":
905
  return create_output(
906
  card_detected=card_detected, finger_detected=True,
907
  scale_px_per_cm=px_per_cm, view_angle_ok=view_angle_ok,
908
- fail_reason="sobel_edge_refinement_failed", edge_method_used="sobel",
909
  )
910
 
911
  # Select method
@@ -917,6 +1180,10 @@ def _measure_single_finger_from_shared(
917
  median_width_cm = sobel_measurement["median_width_cm"]
918
  edge_method_used = "sobel"
919
  final_measurement = sobel_measurement
 
 
 
 
920
  elif edge_method == "auto":
921
  if sobel_measurement and not sobel_failed:
922
  should_use, _ = should_use_sobel_measurement(sobel_measurement, contour_measurement)
@@ -947,11 +1214,11 @@ def _measure_single_finger_from_shared(
947
  finger_conf = compute_finger_confidence(hand_data, finger_data, mask_area, image_area)
948
  measurement_conf = compute_measurement_confidence(final_measurement, median_width_cm)
949
  edge_quality_conf = None
950
- if edge_method_used in ["sobel", "compare"]:
951
  edge_quality_conf = compute_edge_quality_confidence(final_measurement.get("edge_quality"))
952
  confidence_breakdown = compute_overall_confidence(
953
  card_conf, finger_conf, measurement_conf,
954
- edge_method="sobel" if edge_method_used in ["sobel", "compare"] else "contour",
955
  edge_quality_confidence=edge_quality_conf,
956
  )
957
 
@@ -978,12 +1245,14 @@ def measure_multi_finger(
978
  confidence_threshold: float = 0.7,
979
  result_png_path: Optional[str] = None,
980
  save_debug: bool = False,
981
- edge_method: str = "sobel",
982
  sobel_threshold: float = 15.0,
983
  sobel_kernel_size: int = 3,
984
  use_subpixel: bool = True,
985
  skip_card_detection: bool = False,
986
  no_calibration: bool = False,
 
 
987
  ring_model: str = DEFAULT_RING_MODEL,
988
  ) -> Dict[str, Any]:
989
  """Measure index, middle, and ring fingers from a single image.
@@ -996,14 +1265,13 @@ def measure_multi_finger(
996
  """
997
  from src.finger_segmentation import FINGER_LANDMARKS
998
 
999
- # Phase 1: Image quality
1000
  quality = assess_image_quality(image)
1001
  print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
1002
  f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
1003
  if not quality["passed"]:
1004
  for issue in quality["issues"]:
1005
- print(f" Warning: {issue}")
1006
- return {"fail_reason": quality["fail_reason"], "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
1007
 
1008
  # Lighting uniformity check
1009
  lighting = check_lighting_uniformity(image)
@@ -1015,7 +1283,12 @@ def measure_multi_finger(
1015
  if save_debug and result_png_path is not None:
1016
  finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
1017
 
1018
- hand_data = segment_hand(image, finger="index", debug_dir=finger_debug_dir)
 
 
 
 
 
1019
  if hand_data is None:
1020
  print("[multi] No hand detected")
1021
  return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
@@ -1035,7 +1308,12 @@ def measure_multi_finger(
1035
  view_angle_ok = True
1036
  card_detected = False
1037
  else:
1038
- card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
 
 
 
 
 
1039
  if card_result is None:
1040
  return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
1041
  px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
@@ -1113,6 +1391,8 @@ def measure_multi_finger(
1113
  card_result=card_result,
1114
  px_per_cm=px_per_cm,
1115
  result_png_path=result_png_path,
 
 
1116
  )
1117
 
1118
  # Clean internal data from output
@@ -1136,6 +1416,8 @@ def _draw_multi_finger_debug(
1136
  card_result: Optional[Dict[str, Any]],
1137
  px_per_cm: float,
1138
  result_png_path: str,
 
 
1139
  ) -> None:
1140
  """Generate debug visualization for multi-finger measurement.
1141
 
@@ -1154,7 +1436,18 @@ def _draw_multi_finger_debug(
1154
  vis = image_canonical.copy()
1155
  h, w = vis.shape[:2]
1156
 
1157
- # Draw card
 
 
 
 
 
 
 
 
 
 
 
1158
  if card_result is not None:
1159
  vis = draw_card_overlay(vis, card_result, px_per_cm)
1160
 
@@ -1227,8 +1520,7 @@ def _draw_multi_finger_debug(
1227
  Color.GREEN, 1, cv2.LINE_AA)
1228
  count += 1
1229
 
1230
- Path(result_png_path).parent.mkdir(parents=True, exist_ok=True)
1231
- cv2.imwrite(result_png_path, vis)
1232
  print(f"\n[multi] Debug visualization saved to: {result_png_path}")
1233
 
1234
 
@@ -1266,6 +1558,8 @@ def main() -> int:
1266
  use_subpixel=not args.no_subpixel,
1267
  skip_card_detection=args.skip_card_detection,
1268
  no_calibration=args.no_calibration,
 
 
1269
  ring_model=args.ring_model,
1270
  )
1271
 
@@ -1301,6 +1595,8 @@ def main() -> int:
1301
  sobel_kernel_size=args.sobel_kernel_size,
1302
  use_subpixel=not args.no_subpixel,
1303
  skip_card_detection=args.skip_card_detection,
 
 
1304
  ring_model=args.ring_model,
1305
  )
1306
 
 
13
  import json
14
  import sys
15
  from pathlib import Path
16
+ from typing import Optional, Dict, Any, List, Literal, Tuple
17
 
18
  import cv2
19
  import numpy as np
20
 
21
  from src.image_quality import assess_image_quality
22
  from src.card_detection import detect_credit_card, compute_scale_factor
23
+ from src.sam_card_detection import (
24
+ detect_credit_card_sam_prompt,
25
+ suggest_card_seeds,
26
+ )
27
  from src.finger_segmentation import segment_hand, isolate_finger, clean_mask, get_finger_contour
28
  from src.geometry import estimate_finger_axis, localize_ring_zone, localize_ring_zone_from_landmarks, compute_cross_section_width
29
  from src.edge_refinement import refine_edges_sobel, should_use_sobel_measurement, compare_edge_methods
 
34
  compute_edge_quality_confidence,
35
  compute_overall_confidence,
36
  )
37
+ from src.debug_observer import draw_comprehensive_edge_overlay, draw_hand_skeleton
38
  from src.ring_size import recommend_ring_size, aggregate_ring_sizes, VALID_RING_MODELS, DEFAULT_RING_MODEL
39
  from src.image_quality import (
40
  check_card_in_frame,
 
118
  parser.add_argument(
119
  "--edge-method",
120
  type=str,
121
+ default="mask",
122
+ choices=["auto", "contour", "sobel", "mask", "compare"],
123
+ help="Edge detection method: auto (quality-based), contour (v0), sobel (pure Sobel gradient, no SAM mask), mask (SAM mask boundary only, no Sobel), compare (both) (default: mask)",
124
  )
125
  parser.add_argument(
126
  "--sobel-threshold",
 
172
  action="store_true",
173
  help="[TESTING ONLY] Skip card detection and use dummy scale (allows testing finger segmentation without card)",
174
  )
175
+ parser.add_argument(
176
+ "--card-method",
177
+ type=str,
178
+ choices=["classic", "sam"],
179
+ default="classic",
180
+ help="Card detection backend: 'classic' (Canny/adaptive/Otsu/color waterfall) or 'sam' (SAM 2.1 mask segmentation). Default: classic.",
181
+ )
182
+ parser.add_argument(
183
+ "--hand-mask",
184
+ type=str,
185
+ choices=["synthetic", "sam"],
186
+ default="sam",
187
+ help="Hand mask source: 'synthetic' (MediaPipe landmark convex hull) or 'sam' (SAM 2.1 pixel-accurate). Default: sam.",
188
+ )
189
 
190
  return parser.parse_args()
191
 
 
288
  json.dump(output, f, indent=2)
289
 
290
 
291
+ # Debug visualisations are for human inspection, so there's no reason to
292
+ # write a 12-megapixel PNG (encoding alone can take 1–2s on CPU). Cap the
293
+ # long side and encode as JPEG — the on-disk path keeps its .png extension
294
+ # for backwards compat with existing callers, but we write JPEG bytes when
295
+ # the downscale is active to keep encoding well under ~100ms.
296
+ _DEBUG_VIS_MAX_LONG_SIDE = 1600
297
+
298
+
299
+ def _overlay_hand_skeleton(
300
+ image: np.ndarray,
301
+ landmarks: Optional[np.ndarray],
302
+ rotation_matrix: Optional[np.ndarray] = None,
303
+ ) -> np.ndarray:
304
+ """Draw the 21-point MediaPipe hand skeleton onto a debug image.
305
+
306
+ Landmarks are assumed to be in the canonical-image frame. If a precise
307
+ rotation was applied to align the finger vertically, pass the same
308
+ rotation_matrix so the skeleton lands on the rotated image.
309
+ """
310
+ if landmarks is None or len(landmarks) < 21:
311
+ return image
312
+ pts = np.asarray(landmarks, dtype=np.float64)
313
+ if rotation_matrix is not None:
314
+ from src.geometry import transform_points_rotation
315
+ pts = transform_points_rotation(pts, rotation_matrix)
316
+ return draw_hand_skeleton(image, pts)
317
+
318
+
319
+ def _overlay_sam_masks(
320
+ image: np.ndarray,
321
+ hand_mask: Optional[np.ndarray] = None,
322
+ card_mask: Optional[np.ndarray] = None,
323
+ rotation_matrix: Optional[np.ndarray] = None,
324
+ ) -> np.ndarray:
325
+ """Tint the SAM hand and card masks onto a debug image.
326
+
327
+ Hand mask is rendered in cyan, card mask in green. Both are drawn as
328
+ semi-transparent fills plus a solid contour so the pixel-accurate SAM
329
+ silhouettes remain visible underneath downstream finger/edge overlays.
330
+
331
+ If ``rotation_matrix`` is supplied (because the caller applied a precise
332
+ finger-alignment rotation to the canonical image before this call), the
333
+ masks are rotated to match so they stay aligned with the image.
334
+ """
335
+ if hand_mask is None and card_mask is None:
336
+ return image
337
+
338
+ h, w = image.shape[:2]
339
+ out = image.copy()
340
+
341
+ def _prepare(mask: np.ndarray) -> Optional[np.ndarray]:
342
+ if mask is None:
343
+ return None
344
+ if mask.dtype != np.uint8:
345
+ m = (mask > 0).astype(np.uint8) * 255
346
+ else:
347
+ m = mask.copy()
348
+ if m.shape[:2] != (h, w):
349
+ m = cv2.resize(m, (w, h), interpolation=cv2.INTER_NEAREST)
350
+ if rotation_matrix is not None:
351
+ m = cv2.warpAffine(
352
+ m, rotation_matrix, (w, h),
353
+ flags=cv2.INTER_NEAREST,
354
+ borderMode=cv2.BORDER_CONSTANT,
355
+ borderValue=0,
356
+ )
357
+ return m
358
+
359
+ hand_u8 = _prepare(hand_mask)
360
+ card_u8 = _prepare(card_mask)
361
+
362
+ # Semi-transparent fills
363
+ if hand_u8 is not None:
364
+ tint = np.zeros_like(out)
365
+ tint[hand_u8 > 0] = (255, 255, 0) # cyan in BGR
366
+ out = cv2.addWeighted(out, 1.0, tint, 0.18, 0)
367
+ if card_u8 is not None:
368
+ tint = np.zeros_like(out)
369
+ tint[card_u8 > 0] = (0, 255, 0) # green in BGR
370
+ out = cv2.addWeighted(out, 1.0, tint, 0.22, 0)
371
+
372
+ # Solid contours to emphasize the SAM-derived silhouette
373
+ if hand_u8 is not None:
374
+ contours, _ = cv2.findContours(hand_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
375
+ cv2.drawContours(out, contours, -1, (255, 255, 0), 2, cv2.LINE_AA)
376
+ if card_u8 is not None:
377
+ contours, _ = cv2.findContours(card_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
378
+ cv2.drawContours(out, contours, -1, (0, 255, 0), 2, cv2.LINE_AA)
379
+
380
+ return out
381
+
382
+
383
+ def _save_debug_visualization(path: str, image: np.ndarray) -> None:
384
+ """Downscale + fast-encode a debug overlay image.
385
+
386
+ The web demo and validation scripts all consume this just for display,
387
+ so we trade 12 MP PNG encoding (~1–2s) for a ~1600 px JPEG (~50ms)
388
+ without changing the output file path.
389
+ """
390
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
391
+ h, w = image.shape[:2]
392
+ long_side = max(h, w)
393
+ if long_side > _DEBUG_VIS_MAX_LONG_SIDE:
394
+ scale = _DEBUG_VIS_MAX_LONG_SIDE / long_side
395
+ new_size = (int(round(w * scale)), int(round(h * scale)))
396
+ image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
397
+ # JPEG is ~20× faster than PNG to encode at this size and visually
398
+ # indistinguishable for debug overlays.
399
+ ok, buf = cv2.imencode(".jpg", image, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
400
+ if not ok:
401
+ cv2.imwrite(path, image) # fallback to whatever imwrite picks from ext
402
+ return
403
+ with open(path, "wb") as f:
404
+ f.write(buf.tobytes())
405
+
406
+
407
+ def _sam_card_detect(
408
+ image_canonical: np.ndarray,
409
+ hand_data: Dict[str, Any],
410
+ save_debug: bool,
411
+ result_png_path: Optional[str],
412
+ ) -> Optional[Dict[str, Any]]:
413
+ """Run prompt-based SAM card detection.
414
+
415
+ No AMG fallback: empirically, if the 5x5 prompt grid doesn't find the
416
+ card, AMG won't either, and the ~20s AMG retry is pure cost. Returns
417
+ the card dict or None on failure.
418
+ """
419
+ debug_root = (
420
+ Path(result_png_path).parent if (save_debug and result_png_path is not None) else None
421
+ )
422
+ hand_mask = hand_data.get("mask")
423
+ landmarks = hand_data.get("landmarks")
424
+
425
+ if hand_mask is None:
426
+ return None
427
+
428
+ seeds = suggest_card_seeds(hand_mask, image_canonical.shape[:2])
429
+ if not seeds:
430
+ return None
431
+
432
+ negatives: List[Tuple[int, int]] = []
433
+ if landmarks is not None:
434
+ palm_idx = [0, 5, 9, 13, 17]
435
+ palm_c = np.mean(landmarks[palm_idx, :2], axis=0)
436
+ negatives.append((int(round(palm_c[0])), int(round(palm_c[1]))))
437
+
438
+ prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
439
+ return detect_credit_card_sam_prompt(
440
+ image_canonical,
441
+ seed_points=seeds,
442
+ negative_points=negatives,
443
+ debug_dir=prompt_debug,
444
+ )
445
+
446
+
447
  def measure_finger(
448
  image: np.ndarray,
449
  finger_index: FingerIndex = "index",
 
451
  save_intermediate: bool = False,
452
  result_png_path: Optional[str] = None,
453
  save_debug: bool = False,
454
+ edge_method: str = "mask",
455
  sobel_threshold: float = 15.0,
456
  sobel_kernel_size: int = 3,
457
  use_subpixel: bool = True,
458
  skip_card_detection: bool = False,
459
+ card_method: str = "classic",
460
+ hand_mask_method: str = "sam",
461
  ring_model: str = DEFAULT_RING_MODEL,
462
  ) -> Dict[str, Any]:
463
  """
 
478
  Returns:
479
  Output dictionary with measurement results
480
  """
481
+ # Phase 2: Image quality metrics (informational only — no hard fail)
482
  quality = assess_image_quality(image)
483
  print(f"Image quality: blur={quality['blur_score']:.1f}, "
484
  f"brightness={quality['brightness']:.1f}, "
485
  f"contrast={quality['contrast']:.1f}")
 
486
  if not quality["passed"]:
487
  for issue in quality["issues"]:
488
+ print(f" Note: {issue}")
 
489
 
490
  # Phase 3: Hand & finger segmentation (MOVED BEFORE CARD DETECTION)
491
  # This allows us to rotate the image to canonical orientation first
 
494
  if save_debug and result_png_path is not None:
495
  finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
496
 
497
+ hand_data = segment_hand(
498
+ image,
499
+ finger=finger_index,
500
+ debug_dir=finger_debug_dir,
501
+ use_sam_mask=(hand_mask_method == "sam"),
502
+ )
503
 
504
  if hand_data is None:
505
  print("No hand detected in image")
 
537
  view_angle_ok = True
538
  card_detected = False
539
  else:
540
+ if card_method == "sam":
541
+ card_result = _sam_card_detect(
542
+ image_canonical, hand_data, save_debug, result_png_path
543
+ )
544
+ else:
545
+ card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
546
 
547
  if card_result is None:
548
  print("Credit card not detected in image")
 
574
 
575
  # Phase 5: Finger isolation (hand already segmented in Phase 3)
576
  h_can, w_can = image_canonical.shape[:2]
577
+ # Keep a reference to the raw SAM hand mask (pre-isolation polygon clip).
578
+ # mask_only edge detection needs the untrimmed silhouette — the isolation
579
+ # polygon in _create_finger_roi_mask is only ~1.08x the landmark segment
580
+ # length and can cut into a wider-than-average finger, which would make
581
+ # the mask boundary narrower than the true SAM boundary.
582
+ raw_hand_mask = hand_data.get("mask")
583
  finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
584
 
585
  if finger_data is None:
 
687
  borderValue=0
688
  )
689
 
690
+ # Also warp the raw SAM hand mask so mask_only mode can read the
691
+ # untrimmed silhouette in the same rotated frame as the image.
692
+ if raw_hand_mask is not None:
693
+ raw_hand_mask = cv2.warpAffine(
694
+ raw_hand_mask, rotation_matrix, (w_can, h_can),
695
+ flags=cv2.INTER_NEAREST,
696
+ borderMode=cv2.BORDER_CONSTANT,
697
+ borderValue=0,
698
+ )
699
+
700
  print(f"Rotation applied: {angle_from_vertical:.1f}° CW, finger now vertical")
701
  else:
702
  print(f"Finger axis is {angle_from_vertical:.1f}° from vertical (within {rotation_threshold}° threshold, no rotation needed)")
 
760
  sobel_measurement = None
761
  sobel_failed = False
762
 
763
+ if edge_method in ["sobel", "mask", "auto", "compare"]:
764
  try:
765
+ # Pure Sobel mode drops the SAM mask; pure mask mode uses the SAM
766
+ # boundary directly without gradient snapping; auto/compare stay
767
+ # on the legacy hybrid path that combines both.
768
+ if edge_method == "sobel":
769
+ mask_mode = "sobel_only"
770
+ elif edge_method == "mask":
771
+ mask_mode = "mask_only"
772
+ else:
773
+ mask_mode = "hybrid"
774
+
775
+ print(f"Running edge refinement (mode={mask_mode}, threshold={sobel_threshold}, kernel={sobel_kernel_size})...")
776
+
777
  # Create debug directory for edge refinement if debug enabled
778
  edge_debug_dir = None
779
  if save_debug and result_png_path is not None:
780
  edge_debug_dir = str(Path(result_png_path).parent / "edge_refinement_debug")
781
+
782
+ # mask_only reads boundaries directly from the mask, so it needs
783
+ # the *raw* SAM silhouette. The hybrid/sobel_only paths keep the
784
+ # isolation-trimmed mask they were validated against.
785
+ if mask_mode == "mask_only" and raw_hand_mask is not None:
786
+ edge_mask_input = raw_hand_mask
787
+ else:
788
+ edge_mask_input = cleaned_mask
789
+
790
  sobel_measurement = refine_edges_sobel(
791
  image=image_canonical, # Use canonical orientation
792
  axis_data=axis_data,
 
796
  sobel_threshold=sobel_threshold,
797
  kernel_size=sobel_kernel_size,
798
  use_subpixel=use_subpixel,
799
+ finger_mask=edge_mask_input,
800
  debug_dir=edge_debug_dir,
801
+ mask_mode=mask_mode,
802
+ finger_name=finger_data.get("finger_name"),
803
  )
804
 
805
  sobel_width_cm = sobel_measurement["median_width_cm"]
806
+ print(f"Edge width: {sobel_width_cm:.4f}cm "
807
  f"({sobel_measurement['num_samples']} samples, "
808
  f"std={sobel_measurement['std_width_px']:.2f}px, "
809
  f"quality={sobel_measurement['edge_quality']['overall_score']:.3f})")
810
 
811
  except Exception as e:
812
+ print(f"Edge refinement failed: {e}")
813
  sobel_failed = True
814
+ if edge_method in ("sobel", "mask"):
 
815
  return create_output(
816
  card_detected=card_detected,
817
  finger_detected=True,
818
  scale_px_per_cm=px_per_cm,
819
  view_angle_ok=view_angle_ok,
820
  fail_reason="sobel_edge_refinement_failed",
821
+ edge_method_used=edge_method,
822
  )
823
 
824
  # Select measurement method based on edge_method flag
 
836
  median_width_cm = sobel_measurement["median_width_cm"]
837
  edge_method_used = "sobel"
838
 
839
+ elif edge_method == "mask":
840
+ # Use SAM-mask boundary directly (already handled failure case above)
841
+ final_measurement = sobel_measurement
842
+ median_width_cm = sobel_measurement["median_width_cm"]
843
+ edge_method_used = "mask"
844
+
845
  elif edge_method == "auto":
846
  # Automatic selection based on quality
847
  if sobel_measurement and not sobel_failed:
 
910
 
911
  # Calculate edge quality confidence (v1)
912
  edge_quality_conf = None
913
+ if edge_method_used in ["sobel", "mask", "compare"]:
914
  edge_quality_conf = compute_edge_quality_confidence(
915
  final_measurement.get("edge_quality")
916
  )
 
920
  card_conf,
921
  finger_conf,
922
  measurement_conf,
923
+ edge_method="sobel" if edge_method_used in ["sobel", "mask", "compare"] else "contour",
924
  edge_quality_confidence=edge_quality_conf,
925
  )
926
 
 
943
  print(f"Generating result visualization...")
944
 
945
  # Use comprehensive edge overlay (based on Sobel data) + card bounding box
946
+ if edge_method_used in ["sobel", "mask", "compare"] and sobel_measurement and not sobel_failed:
947
  edge_data = sobel_measurement["edge_data"]
948
  roi_bounds = sobel_measurement["roi_data"]["roi_bounds"]
949
  width_data = sobel_measurement["width_data"]
 
973
  # Fallback: plain image with axis/zone annotations when Sobel unavailable
974
  debug_image = image_canonical.copy()
975
 
976
+ # Tint SAM hand + card masks as underlays. Both masks live in the
977
+ # pre-precise-rotation canonical frame, so apply the same rotation
978
+ # matrix that was used to align the finger.
979
+ debug_image = _overlay_sam_masks(
980
+ debug_image,
981
+ hand_mask=hand_data.get("mask") if hand_data else None,
982
+ card_mask=card_result.get("mask") if card_result else None,
983
+ rotation_matrix=rotation_matrix,
984
+ )
985
+
986
+ # Draw the MediaPipe hand skeleton so reviewers can see the detected
987
+ # landmarks. hand_data landmarks are in the pre-precise-rotation
988
+ # canonical frame, so apply the same rotation_matrix here.
989
+ debug_image = _overlay_hand_skeleton(
990
+ debug_image,
991
+ landmarks=hand_data.get("landmarks") if hand_data else None,
992
+ rotation_matrix=rotation_matrix,
993
+ )
994
+
995
  # Draw card bounding box (transform corners if image was rotated)
996
  if card_result is not None and "corners" in card_result:
997
  corners = card_result["corners"]
 
1003
  cv2.polylines(debug_image, [pts], isClosed=True,
1004
  color=(0, 255, 0), thickness=3, lineType=cv2.LINE_AA)
1005
 
1006
+ # Save result image (downscaled + JPEG-encoded for speed)
1007
+ _save_debug_visualization(result_png_path, debug_image)
 
1008
  print(f"Result visualization saved to: {result_png_path}")
1009
 
1010
 
 
1033
  view_angle_ok: bool,
1034
  card_result: Optional[Dict[str, Any]],
1035
  scale_confidence: float,
1036
+ edge_method: str = "mask",
1037
  sobel_threshold: float = 15.0,
1038
  sobel_kernel_size: int = 3,
1039
  use_subpixel: bool = True,
 
1051
  )
1052
 
1053
  h_can, w_can = image_canonical.shape[:2]
1054
+ raw_hand_mask = hand_data.get("mask")
1055
  finger_data = isolate_finger(hand_data, finger=finger_name, image_shape=(h_can, w_can))
1056
 
1057
  if finger_data is None:
 
1103
  cleaned_mask, rotation_matrix, (w_can, h_can),
1104
  flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
1105
  )
1106
+ if raw_hand_mask is not None:
1107
+ raw_hand_mask = cv2.warpAffine(
1108
+ raw_hand_mask, rotation_matrix, (w_can, h_can),
1109
+ flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
1110
+ )
1111
 
1112
  # Ring zone
1113
  try:
 
1141
  # Sobel measurement
1142
  sobel_measurement = None
1143
  sobel_failed = False
1144
+ if edge_method in ["sobel", "mask", "auto", "compare"]:
1145
+ if edge_method == "sobel":
1146
+ mask_mode = "sobel_only"
1147
+ elif edge_method == "mask":
1148
+ mask_mode = "mask_only"
1149
+ else:
1150
+ mask_mode = "hybrid"
1151
  try:
1152
+ if mask_mode == "mask_only" and raw_hand_mask is not None:
1153
+ edge_mask_input = raw_hand_mask
1154
+ else:
1155
+ edge_mask_input = cleaned_mask
1156
  sobel_measurement = refine_edges_sobel(
1157
  image=img_work, axis_data=axis_data, zone_data=zone_data,
1158
  scale_px_per_cm=px_per_cm, finger_landmarks=finger_data.get("landmarks"),
1159
  sobel_threshold=sobel_threshold, kernel_size=sobel_kernel_size,
1160
  use_subpixel=use_subpixel,
1161
+ finger_mask=edge_mask_input,
1162
+ mask_mode=mask_mode,
1163
+ finger_name=finger_name,
1164
  )
1165
  except Exception:
1166
  sobel_failed = True
1167
+ if edge_method in ("sobel", "mask"):
1168
  return create_output(
1169
  card_detected=card_detected, finger_detected=True,
1170
  scale_px_per_cm=px_per_cm, view_angle_ok=view_angle_ok,
1171
+ fail_reason="sobel_edge_refinement_failed", edge_method_used=edge_method,
1172
  )
1173
 
1174
  # Select method
 
1180
  median_width_cm = sobel_measurement["median_width_cm"]
1181
  edge_method_used = "sobel"
1182
  final_measurement = sobel_measurement
1183
+ elif edge_method == "mask" and sobel_measurement:
1184
+ median_width_cm = sobel_measurement["median_width_cm"]
1185
+ edge_method_used = "mask"
1186
+ final_measurement = sobel_measurement
1187
  elif edge_method == "auto":
1188
  if sobel_measurement and not sobel_failed:
1189
  should_use, _ = should_use_sobel_measurement(sobel_measurement, contour_measurement)
 
1214
  finger_conf = compute_finger_confidence(hand_data, finger_data, mask_area, image_area)
1215
  measurement_conf = compute_measurement_confidence(final_measurement, median_width_cm)
1216
  edge_quality_conf = None
1217
+ if edge_method_used in ["sobel", "mask", "compare"]:
1218
  edge_quality_conf = compute_edge_quality_confidence(final_measurement.get("edge_quality"))
1219
  confidence_breakdown = compute_overall_confidence(
1220
  card_conf, finger_conf, measurement_conf,
1221
+ edge_method="sobel" if edge_method_used in ["sobel", "mask", "compare"] else "contour",
1222
  edge_quality_confidence=edge_quality_conf,
1223
  )
1224
 
 
1245
  confidence_threshold: float = 0.7,
1246
  result_png_path: Optional[str] = None,
1247
  save_debug: bool = False,
1248
+ edge_method: str = "mask",
1249
  sobel_threshold: float = 15.0,
1250
  sobel_kernel_size: int = 3,
1251
  use_subpixel: bool = True,
1252
  skip_card_detection: bool = False,
1253
  no_calibration: bool = False,
1254
+ card_method: str = "classic",
1255
+ hand_mask_method: str = "sam",
1256
  ring_model: str = DEFAULT_RING_MODEL,
1257
  ) -> Dict[str, Any]:
1258
  """Measure index, middle, and ring fingers from a single image.
 
1265
  """
1266
  from src.finger_segmentation import FINGER_LANDMARKS
1267
 
1268
+ # Phase 1: Image quality metrics (informational only — no hard fail)
1269
  quality = assess_image_quality(image)
1270
  print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
1271
  f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
1272
  if not quality["passed"]:
1273
  for issue in quality["issues"]:
1274
+ print(f" Note: {issue}")
 
1275
 
1276
  # Lighting uniformity check
1277
  lighting = check_lighting_uniformity(image)
 
1283
  if save_debug and result_png_path is not None:
1284
  finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
1285
 
1286
+ hand_data = segment_hand(
1287
+ image,
1288
+ finger="index",
1289
+ debug_dir=finger_debug_dir,
1290
+ use_sam_mask=(hand_mask_method == "sam"),
1291
+ )
1292
  if hand_data is None:
1293
  print("[multi] No hand detected")
1294
  return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
 
1308
  view_angle_ok = True
1309
  card_detected = False
1310
  else:
1311
+ if card_method == "sam":
1312
+ card_result = _sam_card_detect(
1313
+ image_canonical, hand_data, save_debug, result_png_path
1314
+ )
1315
+ else:
1316
+ card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
1317
  if card_result is None:
1318
  return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
1319
  px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
 
1391
  card_result=card_result,
1392
  px_per_cm=px_per_cm,
1393
  result_png_path=result_png_path,
1394
+ hand_mask=hand_data.get("mask") if hand_data else None,
1395
+ hand_landmarks=hand_data.get("landmarks") if hand_data else None,
1396
  )
1397
 
1398
  # Clean internal data from output
 
1416
  card_result: Optional[Dict[str, Any]],
1417
  px_per_cm: float,
1418
  result_png_path: str,
1419
+ hand_mask: Optional[np.ndarray] = None,
1420
+ hand_landmarks: Optional[np.ndarray] = None,
1421
  ) -> None:
1422
  """Generate debug visualization for multi-finger measurement.
1423
 
 
1436
  vis = image_canonical.copy()
1437
  h, w = vis.shape[:2]
1438
 
1439
+ # SAM silhouettes (hand + card) as tinted underlays
1440
+ vis = _overlay_sam_masks(
1441
+ vis,
1442
+ hand_mask=hand_mask,
1443
+ card_mask=(card_result.get("mask") if card_result else None),
1444
+ )
1445
+
1446
+ # MediaPipe hand skeleton (canonical frame — no rotation needed since the
1447
+ # multi-finger viz composes per-finger overlays via inverse rotation).
1448
+ vis = _overlay_hand_skeleton(vis, landmarks=hand_landmarks)
1449
+
1450
+ # Draw card bounding box / dimensions on top of the tinted card mask
1451
  if card_result is not None:
1452
  vis = draw_card_overlay(vis, card_result, px_per_cm)
1453
 
 
1520
  Color.GREEN, 1, cv2.LINE_AA)
1521
  count += 1
1522
 
1523
+ _save_debug_visualization(result_png_path, vis)
 
1524
  print(f"\n[multi] Debug visualization saved to: {result_png_path}")
1525
 
1526
 
 
1558
  use_subpixel=not args.no_subpixel,
1559
  skip_card_detection=args.skip_card_detection,
1560
  no_calibration=args.no_calibration,
1561
+ card_method=args.card_method,
1562
+ hand_mask_method=args.hand_mask,
1563
  ring_model=args.ring_model,
1564
  )
1565
 
 
1595
  sobel_kernel_size=args.sobel_kernel_size,
1596
  use_subpixel=not args.no_subpixel,
1597
  skip_card_detection=args.skip_card_detection,
1598
+ card_method=args.card_method,
1599
+ hand_mask_method=args.hand_mask,
1600
  ring_model=args.ring_model,
1601
  )
1602
 
requirements.txt CHANGED
@@ -7,3 +7,8 @@ flask>=3.0.0
7
  gunicorn>=21.2.0
8
  openai>=1.0.0
9
  supabase>=2.0.0
 
 
 
 
 
 
7
  gunicorn>=21.2.0
8
  openai>=1.0.0
9
  supabase>=2.0.0
10
+ # SAM 2.1 via HuggingFace transformers (card segmentation)
11
+ torch>=2.4.0
12
+ torchvision>=0.19.0
13
+ transformers>=4.47.0
14
+ pillow>=10.0.0
script/compare_hand_sam.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compare hand-mask quality across backends on a single image.
2
+
3
+ Runs MediaPipe (current pipeline), SAM 2.1 tiny, and SAM 2.1 small using
4
+ a point prompt at the palm center from MediaPipe landmarks. Saves a 4-panel
5
+ side-by-side comparison and also writes each mask's contour + edge crop.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Tuple
13
+
14
+ import cv2
15
+ import numpy as np
16
+ from PIL import Image as PILImage
17
+
18
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
19
+
20
+ from src.finger_segmentation import segment_hand # noqa: E402
21
+
22
+ IMG_PATH = Path("input/sample-04-12/card_2.jpg")
23
+ OUT_DIR = Path("output/hand_sam_compare")
24
+
25
+ SAM_MODELS = [
26
+ ("sam2.1-tiny", "facebook/sam2.1-hiera-tiny"),
27
+ ("sam2.1-small", "facebook/sam2.1-hiera-small"),
28
+ ]
29
+
30
+
31
+ def palm_and_card_points(image_bgr: np.ndarray, hand_data: dict) -> Tuple[Tuple[int, int], Tuple[int, int]]:
32
+ """Return (palm_center, card_center) pixel coords in the canonical image space.
33
+
34
+ Palm center = mean of wrist + MCPs (landmarks 0, 5, 9, 13, 17).
35
+ Card center = a rough point to the left of the hand (negative prompt hint).
36
+ """
37
+ landmarks = hand_data.get("landmarks")
38
+ if landmarks is None:
39
+ raise RuntimeError("MediaPipe returned no landmarks")
40
+
41
+ # landmarks is (21, 2 or 3) in pixel coords
42
+ lm = np.asarray(landmarks)[:, :2]
43
+ palm_ids = [0, 5, 9, 13, 17]
44
+ palm_center = tuple(np.round(lm[palm_ids].mean(axis=0)).astype(int).tolist())
45
+
46
+ # Card hint: far from hand, toward image left
47
+ h, w = image_bgr.shape[:2]
48
+ hand_x_min = int(lm[:, 0].min())
49
+ card_x = max(50, hand_x_min - 150)
50
+ card_y = h // 2
51
+ return palm_center, (card_x, card_y)
52
+
53
+
54
+ def run_sam(
55
+ model_id: str,
56
+ image_rgb: np.ndarray,
57
+ palm_xy: Tuple[int, int],
58
+ negative_xy: Tuple[int, int],
59
+ ) -> Tuple[np.ndarray, float, float]:
60
+ """Run SAM 2.1 with palm positive + card negative point. Returns (mask, score, seconds)."""
61
+ import torch
62
+ from transformers import Sam2Model, Sam2Processor
63
+
64
+ processor = Sam2Processor.from_pretrained(model_id)
65
+ model = Sam2Model.from_pretrained(model_id).to("cpu").eval()
66
+
67
+ pil = PILImage.fromarray(image_rgb)
68
+ input_points = [[[list(palm_xy), list(negative_xy)]]]
69
+ input_labels = [[[1, 0]]]
70
+
71
+ t0 = time.time()
72
+ inputs = processor(
73
+ images=pil,
74
+ input_points=input_points,
75
+ input_labels=input_labels,
76
+ return_tensors="pt",
77
+ )
78
+ with torch.inference_mode():
79
+ outputs = model(**inputs, multimask_output=True)
80
+
81
+ masks = processor.post_process_masks(
82
+ outputs.pred_masks.cpu(),
83
+ inputs["original_sizes"],
84
+ mask_threshold=0.0,
85
+ )[0][0] # (num_candidates, H, W) for first image, first prompt set
86
+ scores = outputs.iou_scores.cpu().numpy()[0, 0]
87
+
88
+ best_idx = int(np.argmax(scores))
89
+ mask = masks[best_idx].numpy().astype(bool)
90
+ return mask, float(scores[best_idx]), time.time() - t0
91
+
92
+
93
+ def mask_to_overlay(image_bgr: np.ndarray, mask: np.ndarray, color: Tuple[int, int, int]) -> np.ndarray:
94
+ """Return a BGR image with the mask tinted + contour drawn."""
95
+ out = image_bgr.copy()
96
+ tint = np.zeros_like(out)
97
+ tint[mask] = color
98
+ out = cv2.addWeighted(out, 1.0, tint, 0.35, 0)
99
+
100
+ contours, _ = cv2.findContours(
101
+ mask.astype(np.uint8) * 255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
102
+ )
103
+ cv2.drawContours(out, contours, -1, color, 2, cv2.LINE_AA)
104
+ return out
105
+
106
+
107
+ def label_panel(img: np.ndarray, text: str) -> np.ndarray:
108
+ h, w = img.shape[:2]
109
+ cv2.rectangle(img, (0, 0), (w, 60), (0, 0, 0), -1)
110
+ cv2.putText(img, text, (20, 42), cv2.FONT_HERSHEY_SIMPLEX, 1.3,
111
+ (255, 255, 255), 3, cv2.LINE_AA)
112
+ return img
113
+
114
+
115
+ def main() -> int:
116
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
117
+
118
+ image_bgr = cv2.imread(str(IMG_PATH))
119
+ if image_bgr is None:
120
+ print(f"Failed to load {IMG_PATH}")
121
+ return 1
122
+
123
+ print(f"Image: {IMG_PATH} {image_bgr.shape}")
124
+
125
+ # --- MediaPipe baseline ---
126
+ t0 = time.time()
127
+ hand_data = segment_hand(image_bgr, finger="index")
128
+ mp_time = time.time() - t0
129
+ if hand_data is None:
130
+ print("MediaPipe detected no hand — aborting")
131
+ return 1
132
+
133
+ canonical_image = hand_data.get("canonical_image", image_bgr)
134
+ mp_mask = hand_data.get("mask")
135
+ if mp_mask is None:
136
+ print("MediaPipe did not return a hand mask")
137
+ return 1
138
+ mp_mask = mp_mask.astype(bool)
139
+ print(f"MediaPipe: {mp_time:.1f}s mask_area={mp_mask.sum()}")
140
+
141
+ # Work in the canonical image so the comparison is apples-to-apples
142
+ image_for_sam = canonical_image.copy()
143
+ palm_xy, card_xy = palm_and_card_points(image_for_sam, hand_data)
144
+ print(f"Palm prompt: {palm_xy} Negative hint: {card_xy}")
145
+
146
+ image_rgb = cv2.cvtColor(image_for_sam, cv2.COLOR_BGR2RGB)
147
+
148
+ # --- SAM models ---
149
+ results = {"mediapipe": (mp_mask, None, mp_time)}
150
+ for name, model_id in SAM_MODELS:
151
+ print(f"\n=== {name} ({model_id}) ===")
152
+ try:
153
+ mask, score, seconds = run_sam(model_id, image_rgb, palm_xy, card_xy)
154
+ # Align shape (should already be canonical)
155
+ if mask.shape != mp_mask.shape:
156
+ mask = cv2.resize(
157
+ mask.astype(np.uint8),
158
+ (mp_mask.shape[1], mp_mask.shape[0]),
159
+ interpolation=cv2.INTER_NEAREST,
160
+ ).astype(bool)
161
+ print(f" score={score:.3f} time={seconds:.1f}s area={mask.sum()}")
162
+ results[name] = (mask, score, seconds)
163
+ except Exception as e:
164
+ print(f" FAILED: {e!r}")
165
+ import traceback
166
+ traceback.print_exc()
167
+
168
+ # --- Render panels ---
169
+ panels = []
170
+ colors = {
171
+ "mediapipe": (0, 165, 255), # orange
172
+ "sam2.1-tiny": (0, 255, 255), # yellow
173
+ "sam2.1-small": (0, 255, 0), # green
174
+ }
175
+
176
+ # Panel 0: original with prompt points
177
+ orig = image_for_sam.copy()
178
+ cv2.circle(orig, palm_xy, 18, (0, 255, 0), -1)
179
+ cv2.circle(orig, palm_xy, 18, (0, 0, 0), 3)
180
+ cv2.circle(orig, card_xy, 18, (0, 0, 255), -1)
181
+ cv2.circle(orig, card_xy, 18, (0, 0, 0), 3)
182
+ panels.append(label_panel(orig, "original + prompts"))
183
+
184
+ for name in ["mediapipe", "sam2.1-tiny", "sam2.1-small"]:
185
+ if name not in results:
186
+ continue
187
+ mask, score, seconds = results[name]
188
+ panel = mask_to_overlay(image_for_sam, mask, colors[name])
189
+ label = f"{name} {seconds:.1f}s"
190
+ if score is not None:
191
+ label += f" score={score:.2f}"
192
+ panels.append(label_panel(panel, label))
193
+
194
+ # Save individual panels full-res
195
+ for i, p in enumerate(panels):
196
+ cv2.imwrite(str(OUT_DIR / f"panel_{i}_{['orig','mediapipe','tiny','small'][i]}.png"), p)
197
+
198
+ # Build a single side-by-side at a readable size
199
+ def resize_to_height(img: np.ndarray, H: int) -> np.ndarray:
200
+ h, w = img.shape[:2]
201
+ scale = H / h
202
+ return cv2.resize(img, (int(round(w * scale)), H), interpolation=cv2.INTER_AREA)
203
+
204
+ target_h = 900
205
+ resized = [resize_to_height(p, target_h) for p in panels]
206
+ combined = np.hstack(resized)
207
+ cv2.imwrite(str(OUT_DIR / "comparison_full.png"), combined)
208
+
209
+ # Also zoom-crop around the hand for fine-detail inspection
210
+ ys, xs = np.where(mp_mask)
211
+ if len(xs) > 0:
212
+ pad = 80
213
+ x0, x1 = max(0, xs.min() - pad), min(image_for_sam.shape[1], xs.max() + pad)
214
+ y0, y1 = max(0, ys.min() - pad), min(image_for_sam.shape[0], ys.max() + pad)
215
+ crops = []
216
+ for p in panels:
217
+ crop = p[y0:y1, x0:x1]
218
+ crops.append(resize_to_height(crop, target_h))
219
+ combined_zoom = np.hstack(crops)
220
+ cv2.imwrite(str(OUT_DIR / "comparison_zoom.png"), combined_zoom)
221
+
222
+ print(f"\nSaved panels to {OUT_DIR}/")
223
+ return 0
224
+
225
+
226
+ if __name__ == "__main__":
227
+ raise SystemExit(main())
script/validate_sam_card.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Validate SAM card detection (classic vs AMG vs prompt) on sample-04-12.
2
+
3
+ Prompt-based SAM depends on MediaPipe running first to provide a hand mask
4
+ for seed derivation, so we run `segment_hand()` on each image before timing
5
+ the three detectors.
6
+
7
+ Outputs per-image rows and a summary with success counts + mean wall time.
8
+ Debug overlays saved under `output/sam_val/<stem>/`.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import sys
14
+ import time
15
+ import traceback
16
+ from pathlib import Path
17
+
18
+ SKIP_AMG = bool(os.environ.get("SKIP_AMG"))
19
+
20
+ import cv2
21
+ import numpy as np
22
+
23
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
24
+
25
+ from src.card_detection import compute_scale_factor, detect_credit_card # noqa: E402
26
+ from src.finger_segmentation import segment_hand # noqa: E402
27
+ from src.sam_card_detection import ( # noqa: E402
28
+ detect_credit_card_sam,
29
+ detect_credit_card_sam_prompt,
30
+ suggest_card_seeds,
31
+ )
32
+
33
+ SAMPLE_DIR = Path("input/sample-04-12")
34
+ OUT_DIR = Path("output/sam_val")
35
+
36
+
37
+ def _negatives_from_landmarks(landmarks: np.ndarray):
38
+ palm_idx = [0, 5, 9, 13, 17]
39
+ c = np.mean(landmarks[palm_idx, :2], axis=0)
40
+ return [(int(round(c[0])), int(round(c[1])))]
41
+
42
+
43
+ def run_one(img_path: Path) -> dict:
44
+ bgr = cv2.imread(str(img_path))
45
+ if bgr is None:
46
+ return {"file": img_path.name, "error": "load_failed"}
47
+
48
+ rec = {"file": img_path.name, "shape": bgr.shape[:2]}
49
+
50
+ # --- MediaPipe + SAM hand (needed for prompt-SAM seeds) ---
51
+ t0 = time.time()
52
+ try:
53
+ hand_data = segment_hand(bgr, finger="index", use_sam_mask=True)
54
+ except Exception as e:
55
+ hand_data = None
56
+ rec["hand_error"] = repr(e)[:120]
57
+ rec["hand_time_s"] = round(time.time() - t0, 2)
58
+
59
+ if hand_data is None:
60
+ rec["hand_detected"] = False
61
+ canonical = bgr
62
+ else:
63
+ rec["hand_detected"] = True
64
+ canonical = hand_data.get("canonical_image", bgr)
65
+
66
+ # --- Classic ---
67
+ t0 = time.time()
68
+ try:
69
+ classic = detect_credit_card(canonical)
70
+ if classic is not None:
71
+ px_cm, _ = compute_scale_factor(classic["corners"])
72
+ rec["classic_px_per_cm"] = px_cm
73
+ else:
74
+ rec["classic_px_per_cm"] = None
75
+ except Exception as e:
76
+ rec["classic_error"] = repr(e)[:120]
77
+ rec["classic_time_s"] = round(time.time() - t0, 2)
78
+
79
+ # --- SAM AMG ---
80
+ rec["amg_px_per_cm"] = None
81
+ rec["amg_time_s"] = None
82
+ if not SKIP_AMG:
83
+ amg_debug = OUT_DIR / img_path.stem / "sam_card_amg"
84
+ t0 = time.time()
85
+ try:
86
+ amg = detect_credit_card_sam(canonical, debug_dir=str(amg_debug))
87
+ if amg is not None:
88
+ px_cm, _ = compute_scale_factor(amg["corners"])
89
+ rec["amg_px_per_cm"] = px_cm
90
+ except Exception as e:
91
+ rec["amg_error"] = repr(e)[:120]
92
+ traceback.print_exc()
93
+ rec["amg_time_s"] = round(time.time() - t0, 2)
94
+
95
+ # --- SAM prompt ---
96
+ rec["prompt_px_per_cm"] = None
97
+ rec["prompt_time_s"] = None
98
+ if hand_data is not None:
99
+ prompt_debug = OUT_DIR / img_path.stem / "sam_card_prompt"
100
+ seeds = suggest_card_seeds(hand_data["mask"], canonical.shape[:2])
101
+ rec["prompt_n_seeds"] = len(seeds)
102
+ negs = _negatives_from_landmarks(hand_data["landmarks"])
103
+ t0 = time.time()
104
+ try:
105
+ pr = detect_credit_card_sam_prompt(
106
+ canonical,
107
+ seed_points=seeds,
108
+ negative_points=negs,
109
+ debug_dir=str(prompt_debug),
110
+ )
111
+ if pr is not None:
112
+ px_cm, _ = compute_scale_factor(pr["corners"])
113
+ rec["prompt_px_per_cm"] = px_cm
114
+ except Exception as e:
115
+ rec["prompt_error"] = repr(e)[:120]
116
+ traceback.print_exc()
117
+ rec["prompt_time_s"] = round(time.time() - t0, 2)
118
+
119
+ return rec
120
+
121
+
122
+ def main() -> int:
123
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
124
+ images = sorted(SAMPLE_DIR.glob("*.jpg"))
125
+ if not images:
126
+ print(f"No images found in {SAMPLE_DIR}")
127
+ return 1
128
+
129
+ print(f"Validating {len(images)} images from {SAMPLE_DIR}\n")
130
+ results = []
131
+ for img in images:
132
+ print(f"=== {img.name} ===")
133
+ rec = run_one(img)
134
+ results.append(rec)
135
+ print(rec)
136
+ print()
137
+
138
+ # --- Summary table ---
139
+ print("\n===== SUMMARY =====")
140
+ header = (
141
+ f"{'file':<18}"
142
+ f"{'classic':>10}{'classicT':>10}"
143
+ f"{'amg':>10}{'amgT':>8}"
144
+ f"{'prompt':>10}{'promptT':>10}"
145
+ )
146
+ print(header)
147
+ print("-" * len(header))
148
+
149
+ counts = {"classic": 0, "amg": 0, "prompt": 0}
150
+ times = {"classic": [], "amg": [], "prompt": []}
151
+
152
+ for r in results:
153
+ def _fmt(v, fmt="{:.2f}"):
154
+ return fmt.format(v) if v is not None else "FAIL"
155
+ c = r.get("classic_px_per_cm")
156
+ a = r.get("amg_px_per_cm")
157
+ p = r.get("prompt_px_per_cm")
158
+ ct = r.get("classic_time_s")
159
+ at = r.get("amg_time_s")
160
+ pt = r.get("prompt_time_s")
161
+ print(
162
+ f"{r['file']:<18}"
163
+ f"{_fmt(c):>10}{_fmt(ct):>10}"
164
+ f"{_fmt(a):>10}{_fmt(at):>8}"
165
+ f"{_fmt(p):>10}{_fmt(pt):>10}"
166
+ )
167
+ if c is not None:
168
+ counts["classic"] += 1
169
+ times["classic"].append(ct)
170
+ if a is not None:
171
+ counts["amg"] += 1
172
+ times["amg"].append(at)
173
+ if p is not None:
174
+ counts["prompt"] += 1
175
+ times["prompt"].append(pt)
176
+
177
+ n = len(results)
178
+ print("-" * len(header))
179
+ for k in ("classic", "amg", "prompt"):
180
+ ok = counts[k]
181
+ mean_t = (sum(times[k]) / len(times[k])) if times[k] else float("nan")
182
+ print(f"{k:<8} success: {ok}/{n} mean_time_s: {mean_t:.2f}")
183
+
184
+ # Agreement check: when both prompt and amg succeeded, how close are scales?
185
+ agree = []
186
+ for r in results:
187
+ a, p = r.get("amg_px_per_cm"), r.get("prompt_px_per_cm")
188
+ if a is not None and p is not None:
189
+ agree.append(100 * abs(a - p) / max(a, p))
190
+ if agree:
191
+ print(f"\nprompt vs amg scale agreement: mean diff {np.mean(agree):.2f}%, "
192
+ f"max {max(agree):.2f}% (n={len(agree)})")
193
+
194
+ return 0
195
+
196
+
197
+ if __name__ == "__main__":
198
+ raise SystemExit(main())
src/edge_refinement.py CHANGED
@@ -93,6 +93,7 @@ def _find_edges_from_axis(
93
  row_mask: Optional[np.ndarray] = None,
94
  row_gradient_left_to_right: Optional[np.ndarray] = None,
95
  row_gradient_right_to_left: Optional[np.ndarray] = None,
 
96
  ) -> Optional[Tuple[float, float, float, float]]:
97
  """
98
  Find left and right edges by expanding from axis position.
@@ -131,72 +132,90 @@ def _find_edges_from_axis(
131
  left_search_gradient = row_gradient_right_to_left if row_gradient_right_to_left is not None else row_gradient
132
  right_search_gradient = row_gradient_left_to_right if row_gradient_left_to_right is not None else row_gradient
133
 
 
 
 
 
 
 
 
 
134
  # MASK-CONSTRAINED MODE (preferred when available)
135
  if row_mask is not None and np.any(row_mask):
136
- # Strategy: Search FROM axis OUTWARD, constrained by mask
137
- # This avoids picking background edges while using gradient precision
 
 
 
 
138
 
139
  mask_indices = np.where(row_mask)[0]
140
  if len(mask_indices) < 2:
141
  return None # Mask too small
142
 
143
- left_mask_boundary = mask_indices[0]
144
- right_mask_boundary = mask_indices[-1]
145
-
146
- # Search LEFT from axis, stopping at mask boundary
147
- left_edge_x = None
148
- left_strength = 0
149
-
150
- # Start from axis, go left until we reach left mask boundary
151
- search_start = max(left_mask_boundary, int(axis_x))
152
- for x in range(search_start, left_mask_boundary - 1, -1):
153
- if x < 0 or x >= len(row_gradient):
154
- continue
155
- if left_search_gradient[x] > threshold:
156
- # Found a strong edge - update if stronger than previous
157
- if left_search_gradient[x] > left_strength:
158
- left_edge_x = x
159
- left_strength = left_search_gradient[x]
160
-
161
- # If no edge found with full threshold, try with relaxed threshold
162
- if left_edge_x is None:
163
- relaxed_threshold = threshold * 0.5
164
- for x in range(search_start, left_mask_boundary - 1, -1):
165
- if x < 0 or x >= len(row_gradient):
166
- continue
167
- if left_search_gradient[x] > relaxed_threshold:
168
- if left_search_gradient[x] > left_strength:
169
- left_edge_x = x
170
- left_strength = left_search_gradient[x]
171
-
172
- # Search RIGHT from axis, stopping at mask boundary
173
- right_edge_x = None
174
- right_strength = 0
175
 
176
- # Start from axis, go right until we reach right mask boundary
177
- search_start = min(right_mask_boundary, int(axis_x))
178
- for x in range(search_start, right_mask_boundary + 1):
179
- if x < 0 or x >= len(row_gradient):
180
- continue
181
- if right_search_gradient[x] > threshold:
182
- # Found a strong edge - update if stronger than previous
183
- if right_search_gradient[x] > right_strength:
184
- right_edge_x = x
185
- right_strength = right_search_gradient[x]
186
-
187
- # If no edge found with full threshold, try with relaxed threshold
188
- if right_edge_x is None:
189
- relaxed_threshold = threshold * 0.5
190
- for x in range(search_start, right_mask_boundary + 1):
191
- if x < 0 or x >= len(row_gradient):
192
- continue
193
- if right_search_gradient[x] > relaxed_threshold:
194
- if right_search_gradient[x] > right_strength:
195
- right_edge_x = x
196
- right_strength = right_search_gradient[x]
197
 
198
- if left_edge_x is None or right_edge_x is None:
199
- return None # No valid edges found
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  else:
202
  # AXIS-EXPANSION MODE (fallback when no mask)
@@ -240,7 +259,9 @@ def extract_ring_zone_roi(
240
  image: np.ndarray,
241
  axis_data: Dict[str, Any],
242
  zone_data: Dict[str, Any],
243
- rotate_align: bool = False
 
 
244
  ) -> Dict[str, Any]:
245
  """
246
  Extract ROI around ring zone.
@@ -274,7 +295,14 @@ def extract_ring_zone_roi(
274
  zone_length = zone_data["length"]
275
  center = zone_data["center_point"]
276
  direction = axis_data["direction"]
277
- half_height = zone_length * 0.25 # 0.5x / 2
 
 
 
 
 
 
 
278
  half_width = zone_length * 0.6 # 1.5x / 2
279
 
280
  x_min = int(np.clip(center[0] - half_width, 0, w - 1))
@@ -294,8 +322,22 @@ def extract_ring_zone_roi(
294
  # Convert to grayscale for edge detection
295
  roi_gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
296
 
297
- # Full ROI mask — the ROI rectangle itself is the search constraint
298
- roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  # Create transform matrix (ROI coords -> original coords)
301
  # Simple translation for non-rotated case
@@ -473,7 +515,8 @@ def detect_edges_per_row(
473
  roi_data: Dict[str, Any],
474
  threshold: float = DEFAULT_GRADIENT_THRESHOLD,
475
  expected_width_px: Optional[float] = None,
476
- scale_px_per_cm: Optional[float] = None
 
477
  ) -> Dict[str, Any]:
478
  """
479
  Detect left and right finger edges for each row (cross-section).
@@ -535,7 +578,12 @@ def detect_edges_per_row(
535
 
536
  # Get finger mask for constrained edge detection (if available)
537
  roi_mask = roi_data.get("roi_mask")
538
- mode_used = "mask_constrained" if roi_mask is not None else "axis_expansion"
 
 
 
 
 
539
 
540
  if roi_mask is not None:
541
  logger.debug(f"Using MASK-CONSTRAINED edge detection (mask shape: {roi_mask.shape})")
@@ -568,7 +616,8 @@ def detect_edges_per_row(
568
  result = _find_edges_from_axis(row_gradient, row, axis_x, threshold,
569
  min_width_px, max_width_px, row_mask,
570
  row_gradient_left_to_right=row_gradient_l2r,
571
- row_gradient_right_to_left=row_gradient_r2l)
 
572
 
573
  if result is None:
574
  continue # No valid edges found
@@ -958,56 +1007,73 @@ def should_use_sobel_measurement(
958
  """
959
  Decide whether to use Sobel measurement or fall back to contour.
960
 
961
- Decision criteria:
 
 
 
 
 
 
 
962
  1. Edge quality score > min_quality_score (default 0.7)
963
  2. Edge consistency > min_consistency (default 0.5 = 50%)
964
- 3. If contour available: Sobel and contour agree within max_difference_pct
 
 
 
 
 
965
 
966
  Args:
967
  sobel_result: Output from refine_edges_sobel()
968
  contour_result: Optional output from compute_cross_section_width()
969
- min_quality_score: Minimum acceptable quality score
970
- min_consistency: Minimum edge detection success rate
971
  max_difference_pct: Maximum allowed difference from contour (%)
972
 
973
  Returns:
974
  Tuple of (should_use_sobel, reason)
975
  """
976
- # Check if edge quality data available
977
  if "edge_quality" not in sobel_result:
978
  return False, "edge_quality_data_missing"
979
 
980
  edge_quality = sobel_result["edge_quality"]
 
 
981
 
982
- # Check 1: Overall quality score
983
- if edge_quality["overall_score"] < min_quality_score:
984
- return False, f"quality_score_low_{edge_quality['overall_score']:.2f}"
985
-
986
- # Check 2: Consistency (success rate)
987
- if edge_quality["consistency_score"] < min_consistency:
988
- return False, f"consistency_low_{edge_quality['consistency_score']:.2f}"
989
-
990
- # Check 3: Measurement reasonableness
991
  sobel_width = sobel_result.get("median_width_cm")
992
  if sobel_width is None or sobel_width <= 0:
993
  return False, "invalid_measurement"
994
-
995
- # Typical finger width range
996
  if sobel_width < MIN_REALISTIC_WIDTH_CM or sobel_width > MAX_REALISTIC_WIDTH_CM:
997
  return False, f"unrealistic_width_{sobel_width:.2f}cm"
998
 
999
- # Check 4: Agreement with contour (if available)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  if contour_result is not None:
1001
  contour_width = contour_result.get("median_width_px")
1002
  sobel_width_px = sobel_result.get("median_width_px")
1003
-
1004
  if contour_width and sobel_width_px:
1005
  diff_pct = abs(sobel_width_px - contour_width) / contour_width * 100
1006
-
1007
  if diff_pct > max_difference_pct:
1008
  return False, f"disagrees_with_contour_{diff_pct:.1f}pct"
1009
 
1010
- # All checks passed
1011
  return True, "quality_acceptable"
1012
 
1013
 
@@ -1023,6 +1089,9 @@ def refine_edges_sobel(
1023
  use_subpixel: bool = True,
1024
  expected_width_px: Optional[float] = None,
1025
  debug_dir: Optional[str] = None,
 
 
 
1026
  ) -> Dict[str, Any]:
1027
  """
1028
  Main entry point for Sobel-based edge refinement.
@@ -1079,10 +1148,15 @@ def refine_edges_sobel(
1079
  # A.2: Ring zone + ROI bounds (need to extract bounds first)
1080
  # We'll save this after ROI extraction
1081
 
1082
- # Step 1: Extract ROI
 
 
 
1083
  roi_data = extract_ring_zone_roi(
1084
  image, axis_data, zone_data,
1085
- rotate_align=rotate_align
 
 
1086
  )
1087
 
1088
  logger.debug(f"ROI size: {roi_data['roi_width']}x{roi_data['roi_height']}px")
@@ -1117,12 +1191,14 @@ def refine_edges_sobel(
1117
  grad_mag = draw_gradient_visualization(gradient_data["gradient_magnitude"], cv2.COLORMAP_HOT)
1118
  observer.save_stage("06_gradient_magnitude", grad_mag)
1119
 
1120
- # Step 3: Detect edges per row
 
1121
  edge_data = detect_edges_per_row(
1122
  gradient_data, roi_data,
1123
  threshold=sobel_threshold,
1124
  expected_width_px=expected_width_px,
1125
- scale_px_per_cm=scale_px_per_cm
 
1126
  )
1127
 
1128
  logger.debug(f"Valid rows: {edge_data['num_valid_rows']}/{len(edge_data['valid_rows'])} ({edge_data['num_valid_rows']/len(edge_data['valid_rows'])*100:.1f}%)")
@@ -1151,11 +1227,13 @@ def refine_edges_sobel(
1151
  # B.5: Selected edges (final detected edges)
1152
  observer.draw_and_save("09_selected_edges", roi_data["roi_image"], draw_selected_edges, edge_data)
1153
 
1154
- # Step 4: Measure width from edges (with sub-pixel refinement)
 
 
1155
  width_data = measure_width_from_edges(
1156
  edge_data, roi_data, scale_px_per_cm,
1157
  gradient_data=gradient_data,
1158
- use_subpixel=use_subpixel
1159
  )
1160
 
1161
  if debug_dir:
 
93
  row_mask: Optional[np.ndarray] = None,
94
  row_gradient_left_to_right: Optional[np.ndarray] = None,
95
  row_gradient_right_to_left: Optional[np.ndarray] = None,
96
+ mask_only: bool = False,
97
  ) -> Optional[Tuple[float, float, float, float]]:
98
  """
99
  Find left and right edges by expanding from axis position.
 
132
  left_search_gradient = row_gradient_right_to_left if row_gradient_right_to_left is not None else row_gradient
133
  right_search_gradient = row_gradient_left_to_right if row_gradient_left_to_right is not None else row_gradient
134
 
135
+ # In mask_only mode a row with no mask pixels (or a mask that doesn't
136
+ # contain the finger axis) must be dropped. Otherwise we would fall
137
+ # through to the gradient axis-expansion path below, which routinely
138
+ # returns ROI-edge coordinates as "edges" on empty rows and poisons the
139
+ # width median.
140
+ if mask_only and (row_mask is None or not np.any(row_mask)):
141
+ return None
142
+
143
  # MASK-CONSTRAINED MODE (preferred when available)
144
  if row_mask is not None and np.any(row_mask):
145
+ # Strategy: the SAM mask already knows where the finger boundary is
146
+ # to pixel accuracy. We anchor to the mask boundary by default, and
147
+ # only snap to a nearby gradient peak when one exceeds the threshold
148
+ # (for sub-pixel refinement). When contrast is weak (pale finger on
149
+ # light background) the gradient search yields nothing, so trusting
150
+ # the mask directly is what prevents "no valid widths" failures.
151
 
152
  mask_indices = np.where(row_mask)[0]
153
  if len(mask_indices) < 2:
154
  return None # Mask too small
155
 
156
+ # Pick the contiguous run of mask pixels that contains the finger
157
+ # axis. This matters when the raw SAM hand mask is passed in (mask_only
158
+ # path): at the ring-zone rows the mask may include adjacent fingers,
159
+ # and np.where(...)[0][0]/[-1] would then span across fingers.
160
+ axis_col = int(round(axis_x))
161
+ axis_col = max(0, min(len(row_mask) - 1, axis_col))
162
+ if not row_mask[axis_col]:
163
+ # Axis is off the mask on this row the ROI is clipping into
164
+ # background (e.g. ring/pinky ROI reaching the MCP webbing).
165
+ # Treat the row as invalid rather than snapping to whatever mask
166
+ # run happens to be nearest; otherwise the wrong run can pull
167
+ # the median width up.
168
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ # Expand left/right from the axis until we leave the mask run.
171
+ left_mask_boundary = axis_col
172
+ while left_mask_boundary > 0 and row_mask[left_mask_boundary - 1]:
173
+ left_mask_boundary -= 1
174
+ right_mask_boundary = axis_col
175
+ max_col = len(row_mask) - 1
176
+ while right_mask_boundary < max_col and row_mask[right_mask_boundary + 1]:
177
+ right_mask_boundary += 1
178
+
179
+ # If the contiguous run reaches either ROI edge the mask has bled
180
+ # out of the ROI laterally — almost always via the webbing into the
181
+ # adjacent finger. The column we stopped at is the ROI edge, not
182
+ # the true finger boundary, so the row is unreliable.
183
+ if mask_only and (left_mask_boundary == 0 or right_mask_boundary == max_col):
184
+ return None
 
 
 
 
 
 
185
 
186
+ SEARCH_RANGE = 10 # px on either side of mask boundary to refine
187
+
188
+ def _snap_to_peak(
189
+ boundary: int,
190
+ gradient: np.ndarray,
191
+ ) -> Tuple[float, float]:
192
+ """Return (edge_x, strength). Snap to local gradient peak if
193
+ it exceeds threshold, otherwise fall back to boundary itself."""
194
+ lo = max(0, boundary - SEARCH_RANGE)
195
+ hi = min(len(gradient) - 1, boundary + SEARCH_RANGE)
196
+ if hi < lo:
197
+ return float(boundary), 0.0
198
+ window = gradient[lo:hi + 1]
199
+ best_rel = int(np.argmax(window))
200
+ best_val = float(window[best_rel])
201
+ if best_val > threshold:
202
+ return float(lo + best_rel), best_val
203
+ # Weak gradient: trust the SAM mask boundary directly.
204
+ return float(boundary), float(gradient[boundary])
205
+
206
+ if mask_only:
207
+ # Trust the SAM mask boundary exactly; no gradient snapping.
208
+ left_edge_x = float(left_mask_boundary)
209
+ right_edge_x = float(right_mask_boundary)
210
+ left_strength = float(left_search_gradient[left_mask_boundary])
211
+ right_strength = float(right_search_gradient[right_mask_boundary])
212
+ else:
213
+ left_edge_x, left_strength = _snap_to_peak(
214
+ left_mask_boundary, left_search_gradient
215
+ )
216
+ right_edge_x, right_strength = _snap_to_peak(
217
+ right_mask_boundary, right_search_gradient
218
+ )
219
 
220
  else:
221
  # AXIS-EXPANSION MODE (fallback when no mask)
 
259
  image: np.ndarray,
260
  axis_data: Dict[str, Any],
261
  zone_data: Dict[str, Any],
262
+ rotate_align: bool = False,
263
+ finger_mask: Optional[np.ndarray] = None,
264
+ finger_name: Optional[str] = None,
265
  ) -> Dict[str, Any]:
266
  """
267
  Extract ROI around ring zone.
 
295
  zone_length = zone_data["length"]
296
  center = zone_data["center_point"]
297
  direction = axis_data["direction"]
298
+ # Ring and pinky have their proximal phalanx set lower on the palm than
299
+ # index/middle, so an ROI sized for the latter reaches down into the
300
+ # MCP webbing — those rows bleed into the adjacent finger's mask. Use
301
+ # half the vertical span (0.25x zone length) for those fingers.
302
+ if finger_name in ("ring", "pinky"):
303
+ half_height = zone_length * 0.25 # 0.25x / 2
304
+ else:
305
+ half_height = zone_length * 0.25 # 0.5x / 2
306
  half_width = zone_length * 0.6 # 1.5x / 2
307
 
308
  x_min = int(np.clip(center[0] - half_width, 0, w - 1))
 
322
  # Convert to grayscale for edge detection
323
  roi_gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
324
 
325
+ # Build the per-row search constraint. Prefer a pixel-accurate finger mask
326
+ # when one is supplied (e.g. SAM 2.1 output). Falling back to a full-rect
327
+ # all-ones mask preserves legacy behaviour.
328
+ if finger_mask is not None:
329
+ fm = finger_mask
330
+ if fm.dtype != np.uint8:
331
+ fm = (fm > 0).astype(np.uint8) * 255
332
+ if fm.shape[:2] != image.shape[:2]:
333
+ fm = cv2.resize(
334
+ fm, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST
335
+ )
336
+ roi_mask = fm[y_min:y_max, x_min:x_max].copy()
337
+ if not np.any(roi_mask):
338
+ roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
339
+ else:
340
+ roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
341
 
342
  # Create transform matrix (ROI coords -> original coords)
343
  # Simple translation for non-rotated case
 
515
  roi_data: Dict[str, Any],
516
  threshold: float = DEFAULT_GRADIENT_THRESHOLD,
517
  expected_width_px: Optional[float] = None,
518
+ scale_px_per_cm: Optional[float] = None,
519
+ mask_only: bool = False,
520
  ) -> Dict[str, Any]:
521
  """
522
  Detect left and right finger edges for each row (cross-section).
 
578
 
579
  # Get finger mask for constrained edge detection (if available)
580
  roi_mask = roi_data.get("roi_mask")
581
+ if mask_only and roi_mask is not None:
582
+ mode_used = "mask_only"
583
+ elif roi_mask is not None:
584
+ mode_used = "mask_constrained"
585
+ else:
586
+ mode_used = "axis_expansion"
587
 
588
  if roi_mask is not None:
589
  logger.debug(f"Using MASK-CONSTRAINED edge detection (mask shape: {roi_mask.shape})")
 
616
  result = _find_edges_from_axis(row_gradient, row, axis_x, threshold,
617
  min_width_px, max_width_px, row_mask,
618
  row_gradient_left_to_right=row_gradient_l2r,
619
+ row_gradient_right_to_left=row_gradient_r2l,
620
+ mask_only=mask_only)
621
 
622
  if result is None:
623
  continue # No valid edges found
 
1007
  """
1008
  Decide whether to use Sobel measurement or fall back to contour.
1009
 
1010
+ When the edge detection ran in ``mask_constrained`` mode (a pixel-accurate
1011
+ finger mask was supplied, e.g. from SAM 2.1), the SAM mask boundary IS
1012
+ the ground truth — a low gradient score just reflects weak finger/background
1013
+ contrast, not a bad measurement. In that case we skip the quality/gradient
1014
+ gates entirely and only enforce the safety checks (plausible width,
1015
+ non-empty sample set, not wildly at odds with the contour baseline).
1016
+
1017
+ Decision criteria (non-masked path):
1018
  1. Edge quality score > min_quality_score (default 0.7)
1019
  2. Edge consistency > min_consistency (default 0.5 = 50%)
1020
+ 3. Realistic width range
1021
+ 4. If contour available: agreement within max_difference_pct
1022
+
1023
+ Decision criteria (mask_constrained path):
1024
+ 1. Non-empty sample set
1025
+ 2. Realistic width range
1026
 
1027
  Args:
1028
  sobel_result: Output from refine_edges_sobel()
1029
  contour_result: Optional output from compute_cross_section_width()
1030
+ min_quality_score: Minimum acceptable quality score (ignored for masked)
1031
+ min_consistency: Minimum edge detection success rate (ignored for masked)
1032
  max_difference_pct: Maximum allowed difference from contour (%)
1033
 
1034
  Returns:
1035
  Tuple of (should_use_sobel, reason)
1036
  """
 
1037
  if "edge_quality" not in sobel_result:
1038
  return False, "edge_quality_data_missing"
1039
 
1040
  edge_quality = sobel_result["edge_quality"]
1041
+ mode_used = sobel_result.get("edge_data", {}).get("mode_used", "axis_expansion")
1042
+ mask_anchored = mode_used in ("mask_constrained", "mask_only")
1043
 
 
 
 
 
 
 
 
 
 
1044
  sobel_width = sobel_result.get("median_width_cm")
1045
  if sobel_width is None or sobel_width <= 0:
1046
  return False, "invalid_measurement"
 
 
1047
  if sobel_width < MIN_REALISTIC_WIDTH_CM or sobel_width > MAX_REALISTIC_WIDTH_CM:
1048
  return False, f"unrealistic_width_{sobel_width:.2f}cm"
1049
 
1050
+ if mask_anchored:
1051
+ # SAM mask is the source of truth, but we still require enough
1052
+ # valid rows to form a robust median. A low sample count with an
1053
+ # otherwise reasonable width usually indicates the per-finger mask
1054
+ # bled into an adjacent finger and width validation killed most
1055
+ # rows — contour is safer in that situation.
1056
+ MIN_MASK_SAMPLES = 20 # parity with the contour path's 20 samples
1057
+ num_samples = int(sobel_result.get("num_samples", 0))
1058
+ if num_samples < MIN_MASK_SAMPLES:
1059
+ return False, f"mask_samples_low_{num_samples}"
1060
+ return True, "mask_anchored"
1061
+
1062
+ # Non-masked path: preserve the original gradient-quality gates.
1063
+ if edge_quality["overall_score"] < min_quality_score:
1064
+ return False, f"quality_score_low_{edge_quality['overall_score']:.2f}"
1065
+
1066
+ if edge_quality["consistency_score"] < min_consistency:
1067
+ return False, f"consistency_low_{edge_quality['consistency_score']:.2f}"
1068
+
1069
  if contour_result is not None:
1070
  contour_width = contour_result.get("median_width_px")
1071
  sobel_width_px = sobel_result.get("median_width_px")
 
1072
  if contour_width and sobel_width_px:
1073
  diff_pct = abs(sobel_width_px - contour_width) / contour_width * 100
 
1074
  if diff_pct > max_difference_pct:
1075
  return False, f"disagrees_with_contour_{diff_pct:.1f}pct"
1076
 
 
1077
  return True, "quality_acceptable"
1078
 
1079
 
 
1089
  use_subpixel: bool = True,
1090
  expected_width_px: Optional[float] = None,
1091
  debug_dir: Optional[str] = None,
1092
+ finger_mask: Optional[np.ndarray] = None,
1093
+ mask_mode: str = "hybrid",
1094
+ finger_name: Optional[str] = None,
1095
  ) -> Dict[str, Any]:
1096
  """
1097
  Main entry point for Sobel-based edge refinement.
 
1148
  # A.2: Ring zone + ROI bounds (need to extract bounds first)
1149
  # We'll save this after ROI extraction
1150
 
1151
+ # Step 1: Extract ROI. Pure-Sobel mode drops the SAM mask so the gradient
1152
+ # search expands from the finger axis without any mask anchoring; the
1153
+ # "mask_only" and legacy "hybrid" modes both pass the mask through.
1154
+ roi_finger_mask = None if mask_mode == "sobel_only" else finger_mask
1155
  roi_data = extract_ring_zone_roi(
1156
  image, axis_data, zone_data,
1157
+ rotate_align=rotate_align,
1158
+ finger_mask=roi_finger_mask,
1159
+ finger_name=finger_name,
1160
  )
1161
 
1162
  logger.debug(f"ROI size: {roi_data['roi_width']}x{roi_data['roi_height']}px")
 
1191
  grad_mag = draw_gradient_visualization(gradient_data["gradient_magnitude"], cv2.COLORMAP_HOT)
1192
  observer.save_stage("06_gradient_magnitude", grad_mag)
1193
 
1194
+ # Step 3: Detect edges per row. In "mask_only" mode the mask boundary is
1195
+ # used verbatim so gradient snapping is disabled.
1196
  edge_data = detect_edges_per_row(
1197
  gradient_data, roi_data,
1198
  threshold=sobel_threshold,
1199
  expected_width_px=expected_width_px,
1200
+ scale_px_per_cm=scale_px_per_cm,
1201
+ mask_only=(mask_mode == "mask_only"),
1202
  )
1203
 
1204
  logger.debug(f"Valid rows: {edge_data['num_valid_rows']}/{len(edge_data['valid_rows'])} ({edge_data['num_valid_rows']/len(edge_data['valid_rows'])*100:.1f}%)")
 
1227
  # B.5: Selected edges (final detected edges)
1228
  observer.draw_and_save("09_selected_edges", roi_data["roi_image"], draw_selected_edges, edge_data)
1229
 
1230
+ # Step 4: Measure width from edges (with sub-pixel refinement).
1231
+ # Sub-pixel refinement is gradient-based, so it is skipped in mask_only.
1232
+ effective_subpixel = use_subpixel and mask_mode != "mask_only"
1233
  width_data = measure_width_from_edges(
1234
  edge_data, roi_data, scale_px_per_cm,
1235
  gradient_data=gradient_data,
1236
+ use_subpixel=effective_subpixel,
1237
  )
1238
 
1239
  if debug_dir:
src/finger_segmentation.py CHANGED
@@ -278,6 +278,7 @@ def segment_hand(
278
  finger: FingerIndex = "index",
279
  max_dimension: int = 1280,
280
  debug_dir: Optional[str] = None,
 
281
  ) -> Optional[Dict[str, Any]]:
282
  """
283
  Detect and segment hand from image using MediaPipe.
@@ -292,10 +293,17 @@ def segment_hand(
292
  Dictionary containing:
293
  - landmarks: 21x2 array of landmark positions (pixel coordinates)
294
  - landmarks_normalized: 21x2 array of normalized coordinates [0-1]
295
- - mask: Binary hand mask
 
 
296
  - confidence: Detection confidence
297
  - handedness: "Left" or "Right"
298
  Or None if no hand detected
 
 
 
 
 
299
  """
300
  # Create debug observer if debug mode enabled
301
  observer = DebugObserver(debug_dir) if debug_dir else None
@@ -427,13 +435,34 @@ def segment_hand(
427
  handedness[0].category_name,
428
  f"det={rotation_code}, orient={orientation_rotation}")
429
 
430
- # Generate hand mask at canonical resolution
431
- mask = _create_hand_mask(landmarks_canonical, (can_full_h, can_full_w))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
  return {
434
  "landmarks": landmarks_canonical,
435
  "landmarks_normalized": landmarks_normalized_canonical,
436
  "mask": mask,
 
 
437
  "confidence": handedness[0].score,
438
  "handedness": handedness[0].category_name,
439
  "rotation_applied": rotation_code,
 
278
  finger: FingerIndex = "index",
279
  max_dimension: int = 1280,
280
  debug_dir: Optional[str] = None,
281
+ use_sam_mask: bool = True,
282
  ) -> Optional[Dict[str, Any]]:
283
  """
284
  Detect and segment hand from image using MediaPipe.
 
293
  Dictionary containing:
294
  - landmarks: 21x2 array of landmark positions (pixel coordinates)
295
  - landmarks_normalized: 21x2 array of normalized coordinates [0-1]
296
+ - mask: Binary hand mask (pixel-accurate SAM 2.1 mask if use_sam_mask=True,
297
+ else synthetic convex-hull fallback from landmarks)
298
+ - mask_synthetic: Synthetic fallback mask (always populated for debug)
299
  - confidence: Detection confidence
300
  - handedness: "Left" or "Right"
301
  Or None if no hand detected
302
+
303
+ Args:
304
+ use_sam_mask: If True (default), call SAM 2.1 seeded by the palm-center
305
+ landmark to produce a pixel-accurate hand mask. If False, use the
306
+ legacy synthetic convex-hull mask built from landmarks only.
307
  """
308
  # Create debug observer if debug mode enabled
309
  observer = DebugObserver(debug_dir) if debug_dir else None
 
435
  handedness[0].category_name,
436
  f"det={rotation_code}, orient={orientation_rotation}")
437
 
438
+ # Legacy synthetic mask (convex hull of landmarks) — kept for fallback and debug
439
+ synthetic_mask = _create_hand_mask(landmarks_canonical, (can_full_h, can_full_w))
440
+
441
+ # SAM 2.1 pixel-accurate hand mask (default), seeded by palm center landmark
442
+ mask = synthetic_mask
443
+ mask_source = "synthetic"
444
+ if use_sam_mask:
445
+ try:
446
+ from .sam_hand_segmentation import segment_hand_sam, palm_center_from_landmarks
447
+ palm_xy = palm_center_from_landmarks(landmarks_canonical)
448
+ sam_debug_dir = str(Path(debug_dir) / "sam_hand") if debug_dir else None
449
+ sam_mask_bool = segment_hand_sam(
450
+ canonical_full, palm_xy, debug_dir=sam_debug_dir
451
+ )
452
+ if sam_mask_bool is not None:
453
+ mask = (sam_mask_bool.astype(np.uint8) * 255)
454
+ mask_source = "sam"
455
+ else:
456
+ print(" SAM hand mask returned None, falling back to synthetic")
457
+ except Exception as e:
458
+ print(f" SAM hand mask failed ({e!r}), falling back to synthetic")
459
 
460
  return {
461
  "landmarks": landmarks_canonical,
462
  "landmarks_normalized": landmarks_normalized_canonical,
463
  "mask": mask,
464
+ "mask_synthetic": synthetic_mask,
465
+ "mask_source": mask_source,
466
  "confidence": handedness[0].score,
467
  "handedness": handedness[0].category_name,
468
  "rotation_applied": rotation_code,
src/geometry.py CHANGED
@@ -92,22 +92,28 @@ def estimate_finger_axis_from_landmarks(
92
  """
93
  Calculate finger axis directly from anatomical landmarks.
94
 
95
- OPTIMIZED: Focuses on DIP-PIP segment (ring-wearing zone) for better accuracy.
 
 
 
 
 
 
96
 
97
  Args:
98
  landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
99
  method: Calculation method
100
  - "endpoints": MCP to TIP vector (legacy, less accurate)
101
- - "linear_fit": DIP to PIP vector (DEFAULT, optimized for ring measurements)
102
  - "median_direction": Median of 3 segment directions (robust to outliers)
103
 
104
  Returns:
105
  Dictionary containing:
106
- - center: Axis center point at midpoint of PIP-DIP (x, y)
107
- - direction: Unit direction vector (dx, dy) from PIP to DIP
108
  - length: Full finger length in pixels (TIP to MCP, for reference)
109
- - palm_end: Visualization endpoint (extended from PIP toward palm)
110
- - tip_end: Visualization endpoint (extended from DIP toward tip)
111
  - method: Method used ("landmarks")
112
  """
113
  # Validate landmarks
@@ -122,7 +128,7 @@ def estimate_finger_axis_from_landmarks(
122
  tip = landmarks[3] # Fingertip
123
 
124
  # Calculate direction based on method
125
- # OPTIMIZED: Focus on DIP-PIP segment (ring-wearing zone)
126
  if method == "endpoints":
127
  # Simple: vector from MCP to TIP (legacy, less accurate for ring zone)
128
  direction = tip - mcp
@@ -130,14 +136,16 @@ def estimate_finger_axis_from_landmarks(
130
  direction = direction / direction_length
131
 
132
  elif method == "linear_fit":
133
- # OPTIMIZED: Use only DIP and PIP (most relevant for ring measurements)
134
- # These two joints define the proximal phalanx where rings are worn
135
- direction = dip - pip # Vector from PIP to DIP
 
 
136
  direction_length = np.linalg.norm(direction)
137
  direction = direction / direction_length
138
 
139
- # Ensure direction points from palm to tip (PIP to DIP)
140
- # Direction should already be correct, but verify
141
  if np.dot(direction, tip - mcp) < 0:
142
  direction = -direction
143
 
@@ -156,18 +164,18 @@ def estimate_finger_axis_from_landmarks(
156
  else:
157
  raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'")
158
 
159
- # OPTIMIZED: Center at midpoint of DIP and PIP (ring zone focus)
160
- center = (pip + dip) / 2.0
161
 
162
  # Calculate finger length (still use full finger for reference)
163
  length = np.linalg.norm(tip - mcp)
164
 
165
- # OPTIMIZED: Visual endpoints are DIP and PIP (ring zone segment)
166
- # Extended slightly for visualization clarity
167
- segment_length = np.linalg.norm(dip - pip)
168
  extension_factor = 0.5 # Extend 50% beyond each endpoint for visualization
169
- palm_end = pip - direction * (segment_length * extension_factor)
170
- tip_end = dip + direction * (segment_length * extension_factor)
171
 
172
  return {
173
  "center": center.astype(np.float32),
 
92
  """
93
  Calculate finger axis directly from anatomical landmarks.
94
 
95
+ OPTIMIZED: Focuses on the PIP-MCP segment (proximal phalanx, where the
96
+ ring actually sits) for better accuracy. For straight fingers (index,
97
+ middle) this agrees with the DIP-PIP direction to within ~1°, but ring
98
+ and pinky often hold a visible PIP-joint curl, so the proximal phalanx
99
+ is at a different angle from the middle phalanx. Rotating by the
100
+ proximal-phalanx direction makes the ring zone exactly vertical and
101
+ cross-sections perpendicular to the bone we measure.
102
 
103
  Args:
104
  landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
105
  method: Calculation method
106
  - "endpoints": MCP to TIP vector (legacy, less accurate)
107
+ - "linear_fit": MCP to PIP vector (DEFAULT, proximal phalanx)
108
  - "median_direction": Median of 3 segment directions (robust to outliers)
109
 
110
  Returns:
111
  Dictionary containing:
112
+ - center: Axis center point at midpoint of MCP-PIP (x, y)
113
+ - direction: Unit direction vector (dx, dy) pointing palm→tip
114
  - length: Full finger length in pixels (TIP to MCP, for reference)
115
+ - palm_end: Visualization endpoint (extended from MCP toward palm)
116
+ - tip_end: Visualization endpoint (extended from PIP toward tip)
117
  - method: Method used ("landmarks")
118
  """
119
  # Validate landmarks
 
128
  tip = landmarks[3] # Fingertip
129
 
130
  # Calculate direction based on method
131
+ # OPTIMIZED: Focus on the PIP-MCP segment (proximal phalanx = ring zone)
132
  if method == "endpoints":
133
  # Simple: vector from MCP to TIP (legacy, less accurate for ring zone)
134
  direction = tip - mcp
 
136
  direction = direction / direction_length
137
 
138
  elif method == "linear_fit":
139
+ # OPTIMIZED: Use MCP→PIP, the proximal phalanx bone that a ring
140
+ # actually rests on. For ring and pinky this differs from the old
141
+ # DIP-PIP direction by the PIP-joint curl angle, which was
142
+ # silently tilting the measurement frame.
143
+ direction = pip - mcp # Vector from MCP to PIP (palm→tip)
144
  direction_length = np.linalg.norm(direction)
145
  direction = direction / direction_length
146
 
147
+ # Sanity check: direction should point palmtip. (MCP→PIP already
148
+ # does, but verify in case landmarks are swapped.)
149
  if np.dot(direction, tip - mcp) < 0:
150
  direction = -direction
151
 
 
164
  else:
165
  raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'")
166
 
167
+ # OPTIMIZED: Center on the proximal phalanx midpoint (the ring zone).
168
+ center = (mcp + pip) / 2.0
169
 
170
  # Calculate finger length (still use full finger for reference)
171
  length = np.linalg.norm(tip - mcp)
172
 
173
+ # OPTIMIZED: Visual endpoints span the proximal phalanx (MCP→PIP)
174
+ # extended slightly for visualization clarity.
175
+ segment_length = np.linalg.norm(pip - mcp)
176
  extension_factor = 0.5 # Extend 50% beyond each endpoint for visualization
177
+ palm_end = mcp - direction * (segment_length * extension_factor)
178
+ tip_end = pip + direction * (segment_length * extension_factor)
179
 
180
  return {
181
  "center": center.astype(np.float32),
src/sam_backend.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared lazy singleton for SAM 2.1 Tiny (model + processor).
2
+
3
+ Both card detection (prompt-based) and hand segmentation use the same
4
+ HuggingFace weights, so loading them once per process halves cold-start
5
+ cost and keeps only one copy of the encoder in memory.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import time
12
+ from typing import Tuple
13
+
14
+ # Bump the default HF Hub HEAD/download timeout (10s) before transformers
15
+ # reads the env var. On flaky networks the 10s HEAD check fires a retry storm
16
+ # even when the weights are already cached locally.
17
+ os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
18
+
19
+ SAM2_MODEL_ID = "facebook/sam2.1-hiera-large"
20
+
21
+ # SAM resizes internally to 1024 — feeding >1024 wastes CPU on image encoding.
22
+ INFERENCE_MAX_SIDE = 1024
23
+
24
+ _model = None
25
+ _processor = None
26
+
27
+
28
+ def get_sam2() -> Tuple[object, object]:
29
+ """Return (model, processor) singletons, loading on first call.
30
+
31
+ Tries the local HF cache first (``local_files_only=True``). This avoids
32
+ the HEAD-request retry storm that happens when huggingface.co is slow or
33
+ unreachable but the weights are already on disk. On a true cache miss we
34
+ fall through to a normal online load.
35
+ """
36
+ global _model, _processor
37
+ if _model is None or _processor is None:
38
+ from transformers import Sam2Model, Sam2Processor
39
+ t0 = time.time()
40
+ print(f" Loading SAM 2.1 ({SAM2_MODEL_ID})...")
41
+ try:
42
+ _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True)
43
+ _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to("cpu").eval()
44
+ print(f" SAM 2.1 loaded (offline cache) in {time.time() - t0:.1f}s")
45
+ except (OSError, ValueError):
46
+ # Cache miss — fall back to online download.
47
+ _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID)
48
+ _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to("cpu").eval()
49
+ print(f" SAM 2.1 loaded (online) in {time.time() - t0:.1f}s")
50
+ return _model, _processor
src/sam_card_detection.py ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SAM 2.1-based credit card detection.
3
+
4
+ Uses Meta's Segment Anything 2.1 (Hiera Tiny) via HuggingFace transformers
5
+ to produce a pixel-accurate card mask, then filters candidate masks by area,
6
+ rectangularity, and aspect ratio (~1.586) to pick the credit card.
7
+
8
+ Drop-in replacement for `card_detection.detect_credit_card`: returns a dict
9
+ with the same keys so the downstream pipeline is unchanged.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Tuple
18
+
19
+ import cv2
20
+ import numpy as np
21
+
22
+ from .card_detection import (
23
+ CARD_ASPECT_RATIO,
24
+ MAX_CARD_AREA_RATIO,
25
+ MIN_CARD_AREA_RATIO,
26
+ get_quad_dimensions,
27
+ order_corners,
28
+ )
29
+ from .sam_backend import INFERENCE_MAX_SIDE as PROMPT_INFERENCE_MAX_SIDE, get_sam2
30
+
31
+ # HF Hub model id — small, fast SAM 2.1 variant that fits CPU / HF free Spaces
32
+ SAM2_MODEL_ID = "facebook/sam2.1-hiera-large"
33
+
34
+ # Downscale large images before inference to keep CPU runtime tractable.
35
+ # SAM 2.1 internally resizes to 1024, so feeding >1024 is pure overhead.
36
+ INFERENCE_MAX_SIDE = 1024
37
+
38
+ # Automatic mask generation grid density. 16 gives ~256 prompts — enough to
39
+ # hit a credit card reliably without blowing up CPU time.
40
+ POINTS_PER_SIDE = 16
41
+ POINTS_PER_BATCH = 64
42
+
43
+ # Candidate filtering
44
+ MIN_RECTANGULARITY = 0.90 # mask_area / minAreaRect_area; card mask is near-perfect rectangle
45
+ ASPECT_RATIO_TOLERANCE = 0.15 # fractional deviation from 1.586
46
+
47
+ _pipeline = None # lazy singleton
48
+
49
+
50
+ def _get_pipeline():
51
+ """Lazy-load the SAM 2.1 mask-generation pipeline (module-level singleton).
52
+
53
+ Tries the local HF cache first to sidestep huggingface.co HEAD-request
54
+ timeouts when the weights are already on disk; falls back to a normal
55
+ online load on a true cache miss.
56
+ """
57
+ global _pipeline
58
+ if _pipeline is None:
59
+ import os as _os
60
+ _os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
61
+ from transformers import pipeline as hf_pipeline
62
+ t0 = time.time()
63
+ print(f" Loading SAM 2.1 ({SAM2_MODEL_ID})...")
64
+ try:
65
+ _pipeline = hf_pipeline(
66
+ "mask-generation", model=SAM2_MODEL_ID, device="cpu",
67
+ model_kwargs={"local_files_only": True},
68
+ )
69
+ print(f" SAM 2.1 loaded (offline cache) in {time.time() - t0:.1f}s")
70
+ except (OSError, ValueError):
71
+ _pipeline = hf_pipeline("mask-generation", model=SAM2_MODEL_ID, device="cpu")
72
+ print(f" SAM 2.1 loaded (online) in {time.time() - t0:.1f}s")
73
+ return _pipeline
74
+
75
+
76
+ def _downscale_for_inference(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
77
+ """Downscale image so the long side is INFERENCE_MAX_SIDE. Returns (scaled, scale_factor).
78
+
79
+ scale_factor is original/scaled, so coords in the scaled space * scale_factor
80
+ -> coords in the original space.
81
+ """
82
+ h, w = image_bgr.shape[:2]
83
+ long_side = max(h, w)
84
+ if long_side <= INFERENCE_MAX_SIDE:
85
+ return image_bgr, 1.0
86
+ scale = INFERENCE_MAX_SIDE / long_side
87
+ new_w = int(round(w * scale))
88
+ new_h = int(round(h * scale))
89
+ scaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)
90
+ return scaled, 1.0 / scale # factor to go scaled -> original
91
+
92
+
93
+ def _mask_to_bool_array(mask: Any, target_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
94
+ """Convert SAM output mask (torch tensor, PIL, or ndarray) to a bool ndarray.
95
+
96
+ If target_shape is given and differs, resize with nearest neighbor.
97
+ """
98
+ if hasattr(mask, "cpu"):
99
+ arr = mask.cpu().numpy()
100
+ else:
101
+ arr = np.asarray(mask)
102
+ if arr.dtype != bool:
103
+ arr = arr > 0
104
+ if target_shape is not None and arr.shape != target_shape:
105
+ arr_u8 = arr.astype(np.uint8) * 255
106
+ resized = cv2.resize(
107
+ arr_u8, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_NEAREST
108
+ )
109
+ arr = resized > 127
110
+ return arr
111
+
112
+
113
+ def _score_card_mask(
114
+ mask: np.ndarray,
115
+ image_area: float,
116
+ ) -> Optional[Dict[str, Any]]:
117
+ """Score a candidate mask for being a credit card.
118
+
119
+ Returns a dict with {corners, width, height, area, aspect_ratio, rectangularity, score}
120
+ or None if the mask is rejected.
121
+ """
122
+ mask_u8 = mask.astype(np.uint8) * 255
123
+ mask_area = float(mask.sum())
124
+
125
+ area_ratio = mask_area / image_area
126
+ if area_ratio < MIN_CARD_AREA_RATIO or area_ratio > MAX_CARD_AREA_RATIO:
127
+ return None
128
+
129
+ contours, _ = cv2.findContours(mask_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
130
+ if not contours:
131
+ return None
132
+
133
+ # Largest external contour is the card body (SAM masks can be slightly disconnected)
134
+ contour = max(contours, key=cv2.contourArea)
135
+ contour_area = cv2.contourArea(contour)
136
+ if contour_area <= 0:
137
+ return None
138
+
139
+ rect = cv2.minAreaRect(contour)
140
+ box = cv2.boxPoints(rect)
141
+ rect_area = cv2.contourArea(box.astype(np.float32))
142
+ if rect_area <= 0:
143
+ return None
144
+
145
+ rectangularity = contour_area / rect_area
146
+ if rectangularity < MIN_RECTANGULARITY:
147
+ return None
148
+
149
+ corners = order_corners(box)
150
+ width, height = get_quad_dimensions(corners)
151
+ if width <= 0 or height <= 0:
152
+ return None
153
+
154
+ aspect_ratio = max(width, height) / min(width, height)
155
+ ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
156
+ if ratio_diff > ASPECT_RATIO_TOLERANCE:
157
+ return None
158
+
159
+ # Higher score: better rectangularity + tighter aspect ratio match + meaningful size
160
+ ratio_score = 1.0 - ratio_diff / ASPECT_RATIO_TOLERANCE
161
+ rect_score = (rectangularity - MIN_RECTANGULARITY) / (1.0 - MIN_RECTANGULARITY)
162
+ area_score = min(area_ratio / 0.1, 1.0) # caps at 10% of image area
163
+ score = 0.4 * ratio_score + 0.4 * rect_score + 0.2 * area_score
164
+
165
+ return {
166
+ "corners": corners,
167
+ "contour": contour,
168
+ "width": width,
169
+ "height": height,
170
+ "area": mask_area,
171
+ "aspect_ratio": aspect_ratio,
172
+ "rectangularity": rectangularity,
173
+ "score": score,
174
+ "mask": mask,
175
+ }
176
+
177
+
178
+ def _save_debug(
179
+ debug_dir: str,
180
+ image_bgr: np.ndarray,
181
+ all_masks: List[np.ndarray],
182
+ scored: List[Dict[str, Any]],
183
+ best: Optional[Dict[str, Any]],
184
+ ) -> None:
185
+ """Save debug visualizations for SAM card detection."""
186
+ Path(debug_dir).mkdir(parents=True, exist_ok=True)
187
+
188
+ # 01: all SAM masks overlaid (colored)
189
+ overlay = image_bgr.copy()
190
+ rng = np.random.default_rng(42)
191
+ for m in all_masks:
192
+ color = rng.integers(64, 255, size=3).tolist()
193
+ overlay[m] = (0.5 * overlay[m] + 0.5 * np.array(color)).astype(np.uint8)
194
+ cv2.imwrite(str(Path(debug_dir) / "01_all_sam_masks.png"), overlay)
195
+
196
+ # 02: scored card candidates (green contours, score labels)
197
+ cand_img = image_bgr.copy()
198
+ for s in scored:
199
+ corners = s["corners"].astype(np.int32)
200
+ cv2.polylines(cand_img, [corners], True, (0, 255, 0), 3)
201
+ cv2.putText(
202
+ cand_img,
203
+ f"{s['score']:.2f} ar={s['aspect_ratio']:.3f}",
204
+ tuple(corners[0]),
205
+ cv2.FONT_HERSHEY_SIMPLEX,
206
+ 1.2,
207
+ (0, 255, 0),
208
+ 3,
209
+ cv2.LINE_AA,
210
+ )
211
+ cv2.imwrite(str(Path(debug_dir) / "02_card_candidates.png"), cand_img)
212
+
213
+ # 03: final selection
214
+ if best is not None:
215
+ final = image_bgr.copy()
216
+ mask_u8 = best["mask"].astype(np.uint8) * 255
217
+ tint = np.zeros_like(final)
218
+ tint[:, :, 1] = mask_u8 # green channel
219
+ final = cv2.addWeighted(final, 1.0, tint, 0.35, 0)
220
+
221
+ corners = best["corners"].astype(np.int32)
222
+ cv2.polylines(final, [corners], True, (0, 255, 0), 4)
223
+ for pt in corners:
224
+ cv2.circle(final, tuple(pt), 10, (0, 0, 255), -1)
225
+
226
+ label = (
227
+ f"SAM card score={best['score']:.3f} "
228
+ f"ar={best['aspect_ratio']:.3f} rect={best['rectangularity']:.3f}"
229
+ )
230
+ cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
231
+ (255, 255, 255), 5, cv2.LINE_AA)
232
+ cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
233
+ (0, 255, 0), 2, cv2.LINE_AA)
234
+ cv2.imwrite(str(Path(debug_dir) / "03_final_selection.png"), final)
235
+
236
+
237
+ def detect_credit_card_sam(
238
+ image: np.ndarray,
239
+ debug_dir: Optional[str] = None,
240
+ ) -> Optional[Dict[str, Any]]:
241
+ """Detect a credit card in the image using SAM 2.1.
242
+
243
+ Args:
244
+ image: Input BGR image (any size)
245
+ debug_dir: Optional directory to save debug visualizations
246
+
247
+ Returns:
248
+ Dict with keys matching card_detection.detect_credit_card:
249
+ {corners, contour, confidence, width_px, height_px, aspect_ratio}
250
+ or None if no card-like mask was found.
251
+ """
252
+ from PIL import Image as PILImage
253
+
254
+ if debug_dir:
255
+ print(f" SAM card detection debug → {debug_dir}")
256
+
257
+ h, w = image.shape[:2]
258
+ image_area = float(h * w)
259
+
260
+ scaled_bgr, scale_back = _downscale_for_inference(image)
261
+ scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
262
+ pil = PILImage.fromarray(scaled_rgb)
263
+
264
+ pipe = _get_pipeline()
265
+ t0 = time.time()
266
+ output = pipe(
267
+ pil,
268
+ points_per_side=POINTS_PER_SIDE,
269
+ points_per_batch=POINTS_PER_BATCH,
270
+ )
271
+ print(f" SAM inference: {time.time() - t0:.1f}s → {len(output['masks'])} masks")
272
+
273
+ # Upscale masks back to original resolution once, keep them for scoring + debug
274
+ all_masks_full: List[np.ndarray] = []
275
+ for m in output["masks"]:
276
+ all_masks_full.append(_mask_to_bool_array(m, target_shape=(h, w)))
277
+
278
+ scored: List[Dict[str, Any]] = []
279
+ for m in all_masks_full:
280
+ result = _score_card_mask(m, image_area)
281
+ if result is not None:
282
+ scored.append(result)
283
+
284
+ scored.sort(key=lambda d: d["score"], reverse=True)
285
+ best = scored[0] if scored else None
286
+
287
+ if debug_dir:
288
+ _save_debug(debug_dir, image, all_masks_full, scored, best)
289
+
290
+ if best is None:
291
+ print(" SAM: no card-like mask found")
292
+ return None
293
+
294
+ print(
295
+ f" SAM card: score={best['score']:.3f}, aspect={best['aspect_ratio']:.3f}, "
296
+ f"rect={best['rectangularity']:.3f}, {best['width']:.0f}x{best['height']:.0f}px"
297
+ )
298
+
299
+ return {
300
+ "corners": best["corners"],
301
+ "contour": best["corners"],
302
+ "confidence": float(best["score"]),
303
+ "width_px": float(best["width"]),
304
+ "height_px": float(best["height"]),
305
+ "aspect_ratio": float(best["aspect_ratio"]),
306
+ "mask": best["mask"],
307
+ "mask_source": "sam_amg",
308
+ }
309
+
310
+
311
+ # ---------------------------------------------------------------------------
312
+ # Prompt-based card detection (fast path)
313
+ # ---------------------------------------------------------------------------
314
+
315
+
316
+ def suggest_card_seeds(
317
+ hand_mask: np.ndarray,
318
+ image_shape: Tuple[int, int],
319
+ max_seeds: int = 25,
320
+ ) -> List[Tuple[int, int]]:
321
+ """Uniform 5x5 grid of seed points, with hand-mask points dropped.
322
+
323
+ Lay out a 5x5 lattice over the inner core of the image (outer 20% margin
324
+ trimmed on each axis because cards never sit flush against the image
325
+ edge), then discard any point that falls inside the hand mask. Dense
326
+ enough that at least one point reliably lands inside the credit card
327
+ regardless of where it sits relative to the hand.
328
+
329
+ Args:
330
+ hand_mask: bool or uint8 hand mask, same shape as the canonical image.
331
+ image_shape: (h, w) of the canonical image.
332
+ max_seeds: hard cap on returned seeds (default 25 = full 5x5 grid).
333
+
334
+ Returns:
335
+ List of (x, y) pixel coordinates in the canonical image frame.
336
+ """
337
+ h, w = image_shape
338
+ mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
339
+
340
+ # 5x5 grid in [0.2, 0.8] × [0.2, 0.8] of the image.
341
+ fracs = (0.20, 0.35, 0.50, 0.65, 0.80)
342
+ xs_grid = [int(round(w * f)) for f in fracs]
343
+ ys_grid = [int(round(h * f)) for f in fracs]
344
+
345
+ seeds: List[Tuple[int, int]] = []
346
+ for gy in ys_grid:
347
+ for gx in xs_grid:
348
+ px = max(0, min(w - 1, gx))
349
+ py = max(0, min(h - 1, gy))
350
+ if mask_bool[py, px]:
351
+ continue
352
+ seeds.append((px, py))
353
+ if len(seeds) >= max_seeds:
354
+ return seeds
355
+ return seeds
356
+
357
+
358
+ def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
359
+ """Downscale for prompt inference. Returns (scaled, scale_back)."""
360
+ h, w = image_bgr.shape[:2]
361
+ long_side = max(h, w)
362
+ if long_side <= PROMPT_INFERENCE_MAX_SIDE:
363
+ return image_bgr, 1.0
364
+ scale = PROMPT_INFERENCE_MAX_SIDE / long_side
365
+ new_w = int(round(w * scale))
366
+ new_h = int(round(h * scale))
367
+ scaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)
368
+ return scaled, 1.0 / scale
369
+
370
+
371
+ def _save_prompt_debug(
372
+ debug_dir: str,
373
+ image_bgr: np.ndarray,
374
+ seeds: List[Tuple[int, int]],
375
+ negatives: List[Tuple[int, int]],
376
+ candidate_masks: List[np.ndarray],
377
+ scored: List[Dict[str, Any]],
378
+ best: Optional[Dict[str, Any]],
379
+ ) -> None:
380
+ """Save debug visualizations for prompt-based card detection."""
381
+ Path(debug_dir).mkdir(parents=True, exist_ok=True)
382
+
383
+ # 01: prompt points on the image
384
+ pts_img = image_bgr.copy()
385
+ for (px, py) in seeds:
386
+ cv2.circle(pts_img, (px, py), 20, (0, 255, 0), -1)
387
+ cv2.circle(pts_img, (px, py), 20, (0, 0, 0), 3)
388
+ for (nx, ny) in negatives:
389
+ cv2.circle(pts_img, (nx, ny), 20, (0, 0, 255), -1)
390
+ cv2.circle(pts_img, (nx, ny), 20, (0, 0, 0), 3)
391
+ cv2.imwrite(str(Path(debug_dir) / "01_prompt_points.png"), pts_img)
392
+
393
+ # 02: all candidate masks overlaid (one color per prompt)
394
+ overlay = image_bgr.copy()
395
+ rng = np.random.default_rng(7)
396
+ for m in candidate_masks:
397
+ if m is None or m.sum() == 0:
398
+ continue
399
+ color = rng.integers(64, 255, size=3).tolist()
400
+ overlay[m] = (0.5 * overlay[m] + 0.5 * np.array(color)).astype(np.uint8)
401
+ cv2.imwrite(str(Path(debug_dir) / "02_candidate_masks.png"), overlay)
402
+
403
+ # 03: scored candidates
404
+ cand_img = image_bgr.copy()
405
+ for s in scored:
406
+ corners = s["corners"].astype(np.int32)
407
+ cv2.polylines(cand_img, [corners], True, (0, 255, 0), 3)
408
+ cv2.putText(
409
+ cand_img,
410
+ f"{s['score']:.2f} ar={s['aspect_ratio']:.3f}",
411
+ tuple(corners[0]),
412
+ cv2.FONT_HERSHEY_SIMPLEX,
413
+ 1.2,
414
+ (0, 255, 0),
415
+ 3,
416
+ cv2.LINE_AA,
417
+ )
418
+ cv2.imwrite(str(Path(debug_dir) / "03_scored.png"), cand_img)
419
+
420
+ if best is not None:
421
+ final = image_bgr.copy()
422
+ mask_u8 = best["mask"].astype(np.uint8) * 255
423
+ tint = np.zeros_like(final)
424
+ tint[:, :, 1] = mask_u8
425
+ final = cv2.addWeighted(final, 1.0, tint, 0.35, 0)
426
+ corners = best["corners"].astype(np.int32)
427
+ cv2.polylines(final, [corners], True, (0, 255, 0), 4)
428
+ for pt in corners:
429
+ cv2.circle(final, tuple(pt), 10, (0, 0, 255), -1)
430
+ label = (
431
+ f"SAM-prompt card score={best['score']:.3f} "
432
+ f"ar={best['aspect_ratio']:.3f} rect={best['rectangularity']:.3f}"
433
+ )
434
+ cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
435
+ (255, 255, 255), 5, cv2.LINE_AA)
436
+ cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
437
+ (0, 255, 0), 2, cv2.LINE_AA)
438
+ cv2.imwrite(str(Path(debug_dir) / "04_final_selection.png"), final)
439
+
440
+
441
+ def detect_credit_card_sam_prompt(
442
+ image: np.ndarray,
443
+ seed_points: List[Tuple[int, int]],
444
+ negative_points: Optional[List[Tuple[int, int]]] = None,
445
+ debug_dir: Optional[str] = None,
446
+ ) -> Optional[Dict[str, Any]]:
447
+ """Prompt-based SAM 2.1 credit card detection.
448
+
449
+ For each seed point, runs a single-point SAM decoder pass with
450
+ `multimask_output=True` and collects all returned masks. Every mask is
451
+ then filtered through `_score_card_mask`; the highest-scoring survivor
452
+ is returned. This is ~20× faster than the AMG path because it runs the
453
+ decoder ~N times (one per seed) instead of 256 times on a dense grid.
454
+
455
+ Args:
456
+ image: Full-resolution BGR image (canonical orientation).
457
+ seed_points: List of (x, y) positive-point candidates. Each one is
458
+ tried independently. A few well-placed candidates are enough.
459
+ negative_points: Optional list of (x, y) negative points applied to
460
+ every seed's prompt (e.g., palm center to steer SAM off the hand).
461
+ debug_dir: Optional directory to dump debug visualizations.
462
+
463
+ Returns:
464
+ Card dict matching `detect_credit_card`/`detect_credit_card_sam`, or
465
+ None if no seed produced a valid card mask.
466
+ """
467
+ import torch
468
+ from PIL import Image as PILImage
469
+
470
+ if not seed_points:
471
+ print(" SAM-prompt: no seed points provided")
472
+ return None
473
+
474
+ h, w = image.shape[:2]
475
+ image_area = float(h * w)
476
+
477
+ scaled_bgr, scale_back = _downscale_prompt(image)
478
+ scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
479
+ pil = PILImage.fromarray(scaled_rgb)
480
+ scale_down = 1.0 / scale_back # original → scaled
481
+
482
+ def _to_scaled(pts: List[Tuple[int, int]]) -> List[List[int]]:
483
+ return [[int(round(px * scale_down)), int(round(py * scale_down))] for px, py in pts]
484
+
485
+ seeds_scaled = _to_scaled(seed_points)
486
+ negatives_scaled = _to_scaled(negative_points) if negative_points else []
487
+
488
+ # Build one prompt per seed; each prompt carries (1 positive + all negatives)
489
+ # input_points shape: [batch=1, num_prompts, points_per_prompt, 2]
490
+ # input_labels shape: [batch=1, num_prompts, points_per_prompt]
491
+ points_per_prompt = 1 + len(negatives_scaled)
492
+ input_points = [[[seed] + negatives_scaled for seed in seeds_scaled]]
493
+ input_labels = [[[1] + [0] * len(negatives_scaled) for _ in seeds_scaled]]
494
+
495
+ model, processor = get_sam2()
496
+
497
+ t0 = time.time()
498
+ inputs = processor(
499
+ images=pil,
500
+ input_points=input_points,
501
+ input_labels=input_labels,
502
+ return_tensors="pt",
503
+ )
504
+ with torch.inference_mode():
505
+ # multimask_output=True gives 3 masks per seed (small / medium / large
506
+ # disambiguation of the prompt). Empirically this matters for card
507
+ # detection: SAM's single-best IoU mask sometimes latches onto a
508
+ # sub-region or a nearby distractor, but one of the other two
509
+ # candidates is the full card. Scoring cost is fine because we score
510
+ # in the scaled 1024-space, not full resolution.
511
+ outputs = model(**inputs, multimask_output=True)
512
+
513
+ # Score masks in the scaled 1024-space. Only the single winner is
514
+ # upscaled to full resolution afterward, which avoids O(N) 12 MP resizes.
515
+ scaled_h = inputs["original_sizes"][0][0].item()
516
+ scaled_w = inputs["original_sizes"][0][1].item()
517
+ scaled_area = float(scaled_h * scaled_w)
518
+
519
+ masks_list = processor.post_process_masks(
520
+ outputs.pred_masks.cpu(),
521
+ inputs["original_sizes"],
522
+ mask_threshold=0.0,
523
+ )
524
+ masks_tensor = masks_list[0] # (num_prompts, num_candidates, H_s, W_s)
525
+ iou_scores = outputs.iou_scores.cpu().numpy()[0]
526
+ infer_time = time.time() - t0
527
+
528
+ scored: List[Dict[str, Any]] = []
529
+ scaled_candidate_masks: List[np.ndarray] = []
530
+ for prompt_idx in range(masks_tensor.shape[0]):
531
+ for cand_idx in range(masks_tensor.shape[1]):
532
+ mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
533
+ scaled_candidate_masks.append(mask_scaled)
534
+ result = _score_card_mask(mask_scaled, scaled_area)
535
+ if result is not None:
536
+ result["seed_idx"] = prompt_idx
537
+ result["cand_idx"] = cand_idx
538
+ result["iou_score"] = float(iou_scores[prompt_idx, cand_idx])
539
+ result["mask_scaled"] = mask_scaled
540
+ scored.append(result)
541
+
542
+ scored.sort(key=lambda d: d["score"], reverse=True)
543
+ best = scored[0] if scored else None
544
+
545
+ # Upscale only the winning mask + corners to full resolution
546
+ if best is not None:
547
+ mask_scaled_best = best["mask_scaled"]
548
+ if mask_scaled_best.shape != (h, w):
549
+ mask_full = cv2.resize(
550
+ mask_scaled_best.astype(np.uint8), (w, h),
551
+ interpolation=cv2.INTER_NEAREST,
552
+ ).astype(bool)
553
+ else:
554
+ mask_full = mask_scaled_best
555
+ best["mask"] = mask_full
556
+ best["corners"] = best["corners"] * scale_back
557
+ best["width"] = best["width"] * scale_back
558
+ best["height"] = best["height"] * scale_back
559
+
560
+ print(
561
+ f" SAM-prompt: {len(seed_points)} seeds, "
562
+ f"{masks_tensor.shape[0] * masks_tensor.shape[1]} candidates, "
563
+ f"{len(scored)} passed filter, inference={infer_time:.2f}s"
564
+ )
565
+
566
+ if debug_dir:
567
+ # Render debug overlays in the downscaled 1024-space. Upscaling
568
+ # ~60 masks to full 12 MP resolution just for PNGs was dominating
569
+ # end-to-end time (8–10s out of ~9s total). The debug images are
570
+ # for human inspection; 1024 is plenty.
571
+ dh, dw = scaled_bgr.shape[:2]
572
+ debug_seeds = [
573
+ (int(round(px / scale_back)), int(round(py / scale_back)))
574
+ for px, py in seed_points
575
+ ]
576
+ debug_negs = [
577
+ (int(round(px / scale_back)), int(round(py / scale_back)))
578
+ for px, py in (negative_points or [])
579
+ ]
580
+ debug_scored_for_viz = []
581
+ for s in scored:
582
+ s_copy = dict(s)
583
+ s_copy["corners"] = s["corners"] # already scaled-space
584
+ s_copy["mask"] = s["mask_scaled"]
585
+ debug_scored_for_viz.append(s_copy)
586
+ best_for_viz = None
587
+ if best is not None:
588
+ best_for_viz = dict(best)
589
+ best_for_viz["corners"] = best["corners"] / scale_back # back to scaled
590
+ best_for_viz["mask"] = best["mask_scaled"]
591
+ _save_prompt_debug(
592
+ debug_dir, scaled_bgr, debug_seeds, debug_negs,
593
+ scaled_candidate_masks, debug_scored_for_viz, best_for_viz,
594
+ )
595
+
596
+ if best is None:
597
+ return None
598
+
599
+ print(
600
+ f" SAM-prompt card: score={best['score']:.3f}, "
601
+ f"aspect={best['aspect_ratio']:.3f}, rect={best['rectangularity']:.3f}, "
602
+ f"{best['width']:.0f}x{best['height']:.0f}px (seed {best['seed_idx']})"
603
+ )
604
+
605
+ return {
606
+ "corners": best["corners"],
607
+ "contour": best["corners"],
608
+ "confidence": float(best["score"]),
609
+ "width_px": float(best["width"]),
610
+ "height_px": float(best["height"]),
611
+ "aspect_ratio": float(best["aspect_ratio"]),
612
+ "mask": best["mask"], # bool HxW, canonical-image coords
613
+ "mask_source": "sam_prompt",
614
+ }
src/sam_hand_segmentation.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SAM 2.1-based hand segmentation.
3
+
4
+ Produces a pixel-accurate hand mask using Meta's Segment Anything 2.1
5
+ (Hiera Tiny) via HuggingFace transformers, seeded by a positive point
6
+ prompt at the palm center (derived from MediaPipe landmarks). Optional
7
+ negative points can steer SAM away from the credit card.
8
+
9
+ This replaces the synthetic convex-hull "mask" produced by
10
+ `finger_segmentation._create_hand_mask()`, which is built from the
11
+ 21 hand landmarks and does not follow the true hand contour.
12
+
13
+ Prompt-based inference: ~0.6s per call on CPU (vs ~18s for AMG).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import time
19
+ from pathlib import Path
20
+ from typing import List, Optional, Tuple
21
+
22
+ import cv2
23
+ import numpy as np
24
+
25
+ from .sam_backend import INFERENCE_MAX_SIDE, get_sam2
26
+
27
+
28
+ def _downscale(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
29
+ """Downscale so the long side is INFERENCE_MAX_SIDE. Returns (scaled, scale_back).
30
+
31
+ `scale_back` is the factor to multiply scaled coords by to get original coords.
32
+ """
33
+ h, w = image_bgr.shape[:2]
34
+ long_side = max(h, w)
35
+ if long_side <= INFERENCE_MAX_SIDE:
36
+ return image_bgr, 1.0
37
+ scale = INFERENCE_MAX_SIDE / long_side
38
+ new_w = int(round(w * scale))
39
+ new_h = int(round(h * scale))
40
+ return cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA), 1.0 / scale
41
+
42
+
43
+ def segment_hand_sam(
44
+ image_bgr: np.ndarray,
45
+ palm_xy: Tuple[int, int],
46
+ negative_points: Optional[List[Tuple[int, int]]] = None,
47
+ debug_dir: Optional[str] = None,
48
+ ) -> Optional[np.ndarray]:
49
+ """Return a pixel-accurate bool hand mask (H x W) via SAM 2.1 Tiny.
50
+
51
+ Args:
52
+ image_bgr: Full-resolution BGR image in the canonical orientation.
53
+ palm_xy: (x, y) pixel coordinates of the palm center (positive prompt).
54
+ negative_points: Optional list of (x, y) points to steer SAM away from
55
+ non-hand regions (e.g., credit card center).
56
+ debug_dir: Optional directory to save mask + overlay for inspection.
57
+
58
+ Returns:
59
+ Bool mask of the same shape as `image_bgr[:2]`, or None on failure.
60
+ """
61
+ import torch
62
+ from PIL import Image as PILImage
63
+
64
+ h_full, w_full = image_bgr.shape[:2]
65
+
66
+ scaled_bgr, scale_back = _downscale(image_bgr)
67
+ scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
68
+ pil = PILImage.fromarray(scaled_rgb)
69
+
70
+ # Map prompt points into the scaled image space
71
+ scale_down = 1.0 / scale_back # original -> scaled
72
+ palm_scaled = (int(round(palm_xy[0] * scale_down)), int(round(palm_xy[1] * scale_down)))
73
+ prompt_points = [list(palm_scaled)]
74
+ prompt_labels = [1]
75
+ if negative_points:
76
+ for nx, ny in negative_points:
77
+ prompt_points.append([int(round(nx * scale_down)), int(round(ny * scale_down))])
78
+ prompt_labels.append(0)
79
+
80
+ model, processor = get_sam2()
81
+
82
+ t0 = time.time()
83
+ inputs = processor(
84
+ images=pil,
85
+ input_points=[[prompt_points]],
86
+ input_labels=[[prompt_labels]],
87
+ return_tensors="pt",
88
+ )
89
+ with torch.inference_mode():
90
+ outputs = model(**inputs, multimask_output=True)
91
+
92
+ masks = processor.post_process_masks(
93
+ outputs.pred_masks.cpu(),
94
+ inputs["original_sizes"],
95
+ mask_threshold=0.0,
96
+ )[0][0] # (num_candidates, H_scaled, W_scaled)
97
+ scores = outputs.iou_scores.cpu().numpy()[0, 0]
98
+ best_idx = int(np.argmax(scores))
99
+ mask_scaled = masks[best_idx].numpy().astype(bool)
100
+ best_score = float(scores[best_idx])
101
+ infer_time = time.time() - t0
102
+
103
+ # Upscale back to original resolution
104
+ if mask_scaled.shape != (h_full, w_full):
105
+ mask_full = cv2.resize(
106
+ mask_scaled.astype(np.uint8),
107
+ (w_full, h_full),
108
+ interpolation=cv2.INTER_NEAREST,
109
+ ).astype(bool)
110
+ else:
111
+ mask_full = mask_scaled
112
+
113
+ print(
114
+ f" SAM hand mask: score={best_score:.3f} time={infer_time:.1f}s "
115
+ f"area={int(mask_full.sum())}px"
116
+ )
117
+
118
+ if debug_dir:
119
+ Path(debug_dir).mkdir(parents=True, exist_ok=True)
120
+ # Raw mask
121
+ cv2.imwrite(str(Path(debug_dir) / "sam_hand_mask.png"),
122
+ mask_full.astype(np.uint8) * 255)
123
+ # Overlay with prompt points
124
+ overlay = image_bgr.copy()
125
+ tint = np.zeros_like(overlay)
126
+ tint[mask_full] = (0, 255, 255)
127
+ overlay = cv2.addWeighted(overlay, 1.0, tint, 0.35, 0)
128
+
129
+ contours, _ = cv2.findContours(
130
+ mask_full.astype(np.uint8) * 255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
131
+ )
132
+ cv2.drawContours(overlay, contours, -1, (0, 255, 255), 3, cv2.LINE_AA)
133
+
134
+ cv2.circle(overlay, palm_xy, 20, (0, 255, 0), -1)
135
+ cv2.circle(overlay, palm_xy, 20, (0, 0, 0), 3)
136
+ if negative_points:
137
+ for nx, ny in negative_points:
138
+ cv2.circle(overlay, (int(nx), int(ny)), 20, (0, 0, 255), -1)
139
+ cv2.circle(overlay, (int(nx), int(ny)), 20, (0, 0, 0), 3)
140
+
141
+ label = f"SAM hand score={best_score:.2f} {infer_time:.1f}s"
142
+ cv2.putText(overlay, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
143
+ (255, 255, 255), 5, cv2.LINE_AA)
144
+ cv2.putText(overlay, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
145
+ (0, 255, 255), 2, cv2.LINE_AA)
146
+ cv2.imwrite(str(Path(debug_dir) / "sam_hand_overlay.png"), overlay)
147
+
148
+ return mask_full
149
+
150
+
151
+ def palm_center_from_landmarks(landmarks_px: np.ndarray) -> Tuple[int, int]:
152
+ """Return (x, y) pixel coord of the palm center from the 21 MediaPipe landmarks.
153
+
154
+ Defined as the mean of wrist (0) + four MCPs (5, 9, 13, 17).
155
+ """
156
+ idx = [0, 5, 9, 13, 17]
157
+ center = np.mean(landmarks_px[idx, :2], axis=0)
158
+ return (int(round(center[0])), int(round(center[1])))
web_demo/README.md CHANGED
@@ -25,4 +25,4 @@ Open `http://localhost:8000`.
25
  - Debug overlay auto-generated per request
26
  - Default guided sample image is at `web_demo/static/examples/default_sample.jpg`
27
  - `Start Measurement` uses the default sample image when no upload is selected
28
- - Web demo enforces Sobel edge refinement only (`edge_method=sobel`)
 
25
  - Debug overlay auto-generated per request
26
  - Default guided sample image is at `web_demo/static/examples/default_sample.jpg`
27
  - `Start Measurement` uses the default sample image when no upload is selected
28
+ - Web demo enforces SAM-mask boundary edge detection only (`edge_method=mask`)
web_demo/app.py CHANGED
@@ -36,7 +36,9 @@ RESULTS_DIR = APP_ROOT / "results"
36
  DEFAULT_SAMPLE_PATH = APP_ROOT / "static" / "examples" / "default_sample.jpg"
37
  DEFAULT_SAMPLE_URL = "/static/examples/default_sample.jpg"
38
  ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}
39
- DEMO_EDGE_METHOD = "sobel"
 
 
40
 
41
  app = Flask(__name__)
42
 
@@ -233,6 +235,8 @@ def _run_measurement(
233
  image=image,
234
  finger_index=finger_index,
235
  edge_method=DEMO_EDGE_METHOD,
 
 
236
  result_png_path=str(result_png_path),
237
  save_debug=False,
238
  ring_model=ring_model,
@@ -313,6 +317,8 @@ def _run_multi_measurement(
313
  result = measure_multi_finger(
314
  image=image,
315
  edge_method=DEMO_EDGE_METHOD,
 
 
316
  result_png_path=str(result_png_path),
317
  save_debug=False,
318
  no_calibration=False,
 
36
  DEFAULT_SAMPLE_PATH = APP_ROOT / "static" / "examples" / "default_sample.jpg"
37
  DEFAULT_SAMPLE_URL = "/static/examples/default_sample.jpg"
38
  ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}
39
+ DEMO_EDGE_METHOD = "mask"
40
+ DEMO_CARD_METHOD = "sam"
41
+ DEMO_HAND_MASK_METHOD = "sam"
42
 
43
  app = Flask(__name__)
44
 
 
235
  image=image,
236
  finger_index=finger_index,
237
  edge_method=DEMO_EDGE_METHOD,
238
+ card_method=DEMO_CARD_METHOD,
239
+ hand_mask_method=DEMO_HAND_MASK_METHOD,
240
  result_png_path=str(result_png_path),
241
  save_debug=False,
242
  ring_model=ring_model,
 
317
  result = measure_multi_finger(
318
  image=image,
319
  edge_method=DEMO_EDGE_METHOD,
320
+ card_method=DEMO_CARD_METHOD,
321
+ hand_mask_method=DEMO_HAND_MASK_METHOD,
322
  result_png_path=str(result_png_path),
323
  save_debug=False,
324
  no_calibration=False,
web_demo/static/app.js CHANGED
@@ -112,7 +112,7 @@ const buildMeasureSettings = () => {
112
  const aiOn = aiToggle ? (aiToggle.type === "checkbox" ? aiToggle.checked : true) : false;
113
  return {
114
  finger_index: fingerSelect ? fingerSelect.value : "index",
115
- edge_method: "sobel",
116
  mode: mode,
117
  ring_model: ringModel,
118
  ai_explain: aiOn ? "1" : "0",
 
112
  const aiOn = aiToggle ? (aiToggle.type === "checkbox" ? aiToggle.checked : true) : false;
113
  return {
114
  finger_index: fingerSelect ? fingerSelect.value : "index",
115
+ edge_method: "mask",
116
  mode: mode,
117
  ring_model: ringModel,
118
  ai_explain: aiOn ? "1" : "0",
web_demo/supabase_client.py CHANGED
@@ -19,12 +19,24 @@ _initialized = False
19
 
20
 
21
  def _get_client():
22
- """Lazy-init Supabase client. Returns None if env vars missing."""
 
 
 
 
 
 
 
23
  global _client, _initialized
24
  if _initialized:
25
  return _client
26
  _initialized = True
27
 
 
 
 
 
 
28
  url = os.environ.get("SUPABASE_URL", "").strip()
29
  key = os.environ.get("SUPABASE_SERVICE_KEY", "").strip()
30
  if not url or not key:
 
19
 
20
 
21
  def _get_client():
22
+ """Lazy-init Supabase client. Returns None if persistence is disabled.
23
+
24
+ Persistence is disabled when either:
25
+ - SUPABASE_URL / SUPABASE_SERVICE_KEY is missing, or
26
+ - RING_DISABLE_SUPABASE is set to a truthy value (explicit opt-out, so
27
+ local dev sessions don't upload photos + result PNGs to the real
28
+ bucket on every request).
29
+ """
30
  global _client, _initialized
31
  if _initialized:
32
  return _client
33
  _initialized = True
34
 
35
+ disable = os.environ.get("RING_DISABLE_SUPABASE", "").strip().lower()
36
+ if disable in ("1", "true", "yes", "on"):
37
+ logger.info("RING_DISABLE_SUPABASE set — persistence disabled")
38
+ return None
39
+
40
  url = os.environ.get("SUPABASE_URL", "").strip()
41
  key = os.environ.get("SUPABASE_SERVICE_KEY", "").strip()
42
  if not url or not key: