Spaces:

feng-x
/

ring-sizer

Running

App Files Files Community

feng-x commited on Apr 14

Commit

22df1ea

verified ·

1 Parent(s): 31bf511

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

measure_finger.py +338 -42
requirements.txt +5 -0
script/compare_hand_sam.py +227 -0
script/validate_sam_card.py +198 -0
src/edge_refinement.py +168 -90
src/finger_segmentation.py +32 -3
src/geometry.py +27 -19
src/sam_backend.py +50 -0
src/sam_card_detection.py +614 -0
src/sam_hand_segmentation.py +158 -0
web_demo/README.md +1 -1
web_demo/app.py +7 -1
web_demo/static/app.js +1 -1
web_demo/supabase_client.py +13 -1

measure_finger.py CHANGED Viewed

@@ -13,13 +13,17 @@ import argparse
 import json
 import sys
 from pathlib import Path
-from typing import Optional, Dict, Any, Literal
 import cv2
 import numpy as np
 from src.image_quality import assess_image_quality
 from src.card_detection import detect_credit_card, compute_scale_factor
 from src.finger_segmentation import segment_hand, isolate_finger, clean_mask, get_finger_contour
 from src.geometry import estimate_finger_axis, localize_ring_zone, localize_ring_zone_from_landmarks, compute_cross_section_width
 from src.edge_refinement import refine_edges_sobel, should_use_sobel_measurement, compare_edge_methods
@@ -30,7 +34,7 @@ from src.confidence import (
     compute_edge_quality_confidence,
     compute_overall_confidence,
 )
-from src.debug_observer import draw_comprehensive_edge_overlay
 from src.ring_size import recommend_ring_size, aggregate_ring_sizes, VALID_RING_MODELS, DEFAULT_RING_MODEL
 from src.image_quality import (
     check_card_in_frame,
@@ -114,9 +118,9 @@ Examples:
     parser.add_argument(
         "--edge-method",
         type=str,
-        default="auto",
-        choices=["auto", "contour", "sobel", "compare"],
-        help="Edge detection method: auto (quality-based), contour (v0), sobel (v1), compare (both) (default: auto)",
     )
     parser.add_argument(
         "--sobel-threshold",
@@ -168,6 +172,20 @@ Examples:
         action="store_true",
         help="[TESTING ONLY] Skip card detection and use dummy scale (allows testing finger segmentation without card)",
     )
     return parser.parse_args()
@@ -270,6 +288,162 @@ def save_output(output: Dict[str, Any], output_path: str) -> None:
         json.dump(output, f, indent=2)
 def measure_finger(
     image: np.ndarray,
     finger_index: FingerIndex = "index",
@@ -277,11 +451,13 @@ def measure_finger(
     save_intermediate: bool = False,
     result_png_path: Optional[str] = None,
     save_debug: bool = False,
-    edge_method: str = "auto",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
     skip_card_detection: bool = False,
     ring_model: str = DEFAULT_RING_MODEL,
 ) -> Dict[str, Any]:
     """
@@ -302,16 +478,14 @@ def measure_finger(
     Returns:
         Output dictionary with measurement results
     """
-    # Phase 2: Image quality check
     quality = assess_image_quality(image)
     print(f"Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, "
           f"contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
         for issue in quality["issues"]:
-            print(f"  Warning: {issue}")
-        return create_output(fail_reason=quality["fail_reason"])
     # Phase 3: Hand & finger segmentation (MOVED BEFORE CARD DETECTION)
     # This allows us to rotate the image to canonical orientation first
@@ -320,7 +494,12 @@ def measure_finger(
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
-    hand_data = segment_hand(image, finger=finger_index, debug_dir=finger_debug_dir)
     if hand_data is None:
         print("No hand detected in image")
@@ -358,7 +537,12 @@ def measure_finger(
         view_angle_ok = True
         card_detected = False
     else:
-        card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             print("Credit card not detected in image")
@@ -390,6 +574,12 @@ def measure_finger(
     # Phase 5: Finger isolation (hand already segmented in Phase 3)
     h_can, w_can = image_canonical.shape[:2]
     finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
     if finger_data is None:
@@ -497,6 +687,16 @@ def measure_finger(
             borderValue=0
         )
         print(f"Rotation applied: {angle_from_vertical:.1f}° CW, finger now vertical")
     else:
         print(f"Finger axis is {angle_from_vertical:.1f}° from vertical (within {rotation_threshold}° threshold, no rotation needed)")
@@ -560,15 +760,33 @@ def measure_finger(
     sobel_measurement = None
     sobel_failed = False
-    if edge_method in ["sobel", "auto", "compare"]:
         try:
-            print(f"Running Sobel edge refinement (threshold={sobel_threshold}, kernel={sobel_kernel_size})...")
             # Create debug directory for edge refinement if debug enabled
             edge_debug_dir = None
             if save_debug and result_png_path is not None:
                 edge_debug_dir = str(Path(result_png_path).parent / "edge_refinement_debug")
             sobel_measurement = refine_edges_sobel(
                 image=image_canonical,  # Use canonical orientation
                 axis_data=axis_data,
@@ -578,27 +796,29 @@ def measure_finger(
                 sobel_threshold=sobel_threshold,
                 kernel_size=sobel_kernel_size,
                 use_subpixel=use_subpixel,
                 debug_dir=edge_debug_dir,
             )
             sobel_width_cm = sobel_measurement["median_width_cm"]
-            print(f"Sobel width: {sobel_width_cm:.4f}cm "
                   f"({sobel_measurement['num_samples']} samples, "
                   f"std={sobel_measurement['std_width_px']:.2f}px, "
                   f"quality={sobel_measurement['edge_quality']['overall_score']:.3f})")
         except Exception as e:
-            print(f"Sobel edge refinement failed: {e}")
             sobel_failed = True
-            if edge_method == "sobel":
-                # User explicitly requested Sobel, fail if it doesn't work
                 return create_output(
                     card_detected=card_detected,
                     finger_detected=True,
                     scale_px_per_cm=px_per_cm,
                     view_angle_ok=view_angle_ok,
                     fail_reason="sobel_edge_refinement_failed",
-                    edge_method_used="sobel",
                 )
     # Select measurement method based on edge_method flag
@@ -616,6 +836,12 @@ def measure_finger(
         median_width_cm = sobel_measurement["median_width_cm"]
         edge_method_used = "sobel"
     elif edge_method == "auto":
         # Automatic selection based on quality
         if sobel_measurement and not sobel_failed:
@@ -684,7 +910,7 @@ def measure_finger(
     # Calculate edge quality confidence (v1)
     edge_quality_conf = None
-    if edge_method_used in ["sobel", "compare"]:
         edge_quality_conf = compute_edge_quality_confidence(
             final_measurement.get("edge_quality")
         )
@@ -694,7 +920,7 @@ def measure_finger(
         card_conf,
         finger_conf,
         measurement_conf,
-        edge_method="sobel" if edge_method_used in ["sobel", "compare"] else "contour",
         edge_quality_confidence=edge_quality_conf,
     )
@@ -717,7 +943,7 @@ def measure_finger(
         print(f"Generating result visualization...")
         # Use comprehensive edge overlay (based on Sobel data) + card bounding box
-        if edge_method_used in ["sobel", "compare"] and sobel_measurement and not sobel_failed:
             edge_data = sobel_measurement["edge_data"]
             roi_bounds = sobel_measurement["roi_data"]["roi_bounds"]
             width_data = sobel_measurement["width_data"]
@@ -747,6 +973,25 @@ def measure_finger(
             # Fallback: plain image with axis/zone annotations when Sobel unavailable
             debug_image = image_canonical.copy()
         # Draw card bounding box (transform corners if image was rotated)
         if card_result is not None and "corners" in card_result:
             corners = card_result["corners"]
@@ -758,9 +1003,8 @@ def measure_finger(
                 cv2.polylines(debug_image, [pts], isClosed=True,
                               color=(0, 255, 0), thickness=3, lineType=cv2.LINE_AA)
-        # Save result image
-        Path(result_png_path).parent.mkdir(parents=True, exist_ok=True)
-        cv2.imwrite(result_png_path, debug_image)
         print(f"Result visualization saved to: {result_png_path}")
@@ -789,7 +1033,7 @@ def _measure_single_finger_from_shared(
     view_angle_ok: bool,
     card_result: Optional[Dict[str, Any]],
     scale_confidence: float,
-    edge_method: str = "sobel",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
@@ -807,6 +1051,7 @@ def _measure_single_finger_from_shared(
     )
     h_can, w_can = image_canonical.shape[:2]
     finger_data = isolate_finger(hand_data, finger=finger_name, image_shape=(h_can, w_can))
     if finger_data is None:
@@ -858,6 +1103,11 @@ def _measure_single_finger_from_shared(
             cleaned_mask, rotation_matrix, (w_can, h_can),
             flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
         )
     # Ring zone
     try:
@@ -891,21 +1141,34 @@ def _measure_single_finger_from_shared(
     # Sobel measurement
     sobel_measurement = None
     sobel_failed = False
-    if edge_method in ["sobel", "auto", "compare"]:
         try:
             sobel_measurement = refine_edges_sobel(
                 image=img_work, axis_data=axis_data, zone_data=zone_data,
                 scale_px_per_cm=px_per_cm, finger_landmarks=finger_data.get("landmarks"),
                 sobel_threshold=sobel_threshold, kernel_size=sobel_kernel_size,
                 use_subpixel=use_subpixel,
             )
         except Exception:
             sobel_failed = True
-            if edge_method == "sobel":
                 return create_output(
                     card_detected=card_detected, finger_detected=True,
                     scale_px_per_cm=px_per_cm, view_angle_ok=view_angle_ok,
-                    fail_reason="sobel_edge_refinement_failed", edge_method_used="sobel",
                 )
     # Select method
@@ -917,6 +1180,10 @@ def _measure_single_finger_from_shared(
         median_width_cm = sobel_measurement["median_width_cm"]
         edge_method_used = "sobel"
         final_measurement = sobel_measurement
     elif edge_method == "auto":
         if sobel_measurement and not sobel_failed:
             should_use, _ = should_use_sobel_measurement(sobel_measurement, contour_measurement)
@@ -947,11 +1214,11 @@ def _measure_single_finger_from_shared(
     finger_conf = compute_finger_confidence(hand_data, finger_data, mask_area, image_area)
     measurement_conf = compute_measurement_confidence(final_measurement, median_width_cm)
     edge_quality_conf = None
-    if edge_method_used in ["sobel", "compare"]:
         edge_quality_conf = compute_edge_quality_confidence(final_measurement.get("edge_quality"))
     confidence_breakdown = compute_overall_confidence(
         card_conf, finger_conf, measurement_conf,
-        edge_method="sobel" if edge_method_used in ["sobel", "compare"] else "contour",
         edge_quality_confidence=edge_quality_conf,
     )
@@ -978,12 +1245,14 @@ def measure_multi_finger(
     confidence_threshold: float = 0.7,
     result_png_path: Optional[str] = None,
     save_debug: bool = False,
-    edge_method: str = "sobel",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
     skip_card_detection: bool = False,
     no_calibration: bool = False,
     ring_model: str = DEFAULT_RING_MODEL,
 ) -> Dict[str, Any]:
     """Measure index, middle, and ring fingers from a single image.
@@ -996,14 +1265,13 @@ def measure_multi_finger(
     """
     from src.finger_segmentation import FINGER_LANDMARKS
-    # Phase 1: Image quality
     quality = assess_image_quality(image)
     print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
         for issue in quality["issues"]:
-            print(f"  Warning: {issue}")
-        return {"fail_reason": quality["fail_reason"], "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
     # Lighting uniformity check
     lighting = check_lighting_uniformity(image)
@@ -1015,7 +1283,12 @@ def measure_multi_finger(
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
-    hand_data = segment_hand(image, finger="index", debug_dir=finger_debug_dir)
     if hand_data is None:
         print("[multi] No hand detected")
         return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
@@ -1035,7 +1308,12 @@ def measure_multi_finger(
         view_angle_ok = True
         card_detected = False
     else:
-        card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
         px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
@@ -1113,6 +1391,8 @@ def measure_multi_finger(
             card_result=card_result,
             px_per_cm=px_per_cm,
             result_png_path=result_png_path,
         )
     # Clean internal data from output
@@ -1136,6 +1416,8 @@ def _draw_multi_finger_debug(
     card_result: Optional[Dict[str, Any]],
     px_per_cm: float,
     result_png_path: str,
 ) -> None:
     """Generate debug visualization for multi-finger measurement.
@@ -1154,7 +1436,18 @@ def _draw_multi_finger_debug(
     vis = image_canonical.copy()
     h, w = vis.shape[:2]
-    # Draw card
     if card_result is not None:
         vis = draw_card_overlay(vis, card_result, px_per_cm)
@@ -1227,8 +1520,7 @@ def _draw_multi_finger_debug(
                              Color.GREEN, 1, cv2.LINE_AA)
                 count += 1
-    Path(result_png_path).parent.mkdir(parents=True, exist_ok=True)
-    cv2.imwrite(result_png_path, vis)
     print(f"\n[multi] Debug visualization saved to: {result_png_path}")
@@ -1266,6 +1558,8 @@ def main() -> int:
             use_subpixel=not args.no_subpixel,
             skip_card_detection=args.skip_card_detection,
             no_calibration=args.no_calibration,
             ring_model=args.ring_model,
         )
@@ -1301,6 +1595,8 @@ def main() -> int:
         sobel_kernel_size=args.sobel_kernel_size,
         use_subpixel=not args.no_subpixel,
         skip_card_detection=args.skip_card_detection,
         ring_model=args.ring_model,
     )

 import json
 import sys
 from pathlib import Path
+from typing import Optional, Dict, Any, List, Literal, Tuple
 import cv2
 import numpy as np
 from src.image_quality import assess_image_quality
 from src.card_detection import detect_credit_card, compute_scale_factor
+from src.sam_card_detection import (
+    detect_credit_card_sam_prompt,
+    suggest_card_seeds,
+)
 from src.finger_segmentation import segment_hand, isolate_finger, clean_mask, get_finger_contour
 from src.geometry import estimate_finger_axis, localize_ring_zone, localize_ring_zone_from_landmarks, compute_cross_section_width
 from src.edge_refinement import refine_edges_sobel, should_use_sobel_measurement, compare_edge_methods
     compute_edge_quality_confidence,
     compute_overall_confidence,
 )
+from src.debug_observer import draw_comprehensive_edge_overlay, draw_hand_skeleton
 from src.ring_size import recommend_ring_size, aggregate_ring_sizes, VALID_RING_MODELS, DEFAULT_RING_MODEL
 from src.image_quality import (
     check_card_in_frame,
     parser.add_argument(
         "--edge-method",
         type=str,
+        default="mask",
+        choices=["auto", "contour", "sobel", "mask", "compare"],
+        help="Edge detection method: auto (quality-based), contour (v0), sobel (pure Sobel gradient, no SAM mask), mask (SAM mask boundary only, no Sobel), compare (both) (default: mask)",
     )
     parser.add_argument(
         "--sobel-threshold",
         action="store_true",
         help="[TESTING ONLY] Skip card detection and use dummy scale (allows testing finger segmentation without card)",
     )
+    parser.add_argument(
+        "--card-method",
+        type=str,
+        choices=["classic", "sam"],
+        default="classic",
+        help="Card detection backend: 'classic' (Canny/adaptive/Otsu/color waterfall) or 'sam' (SAM 2.1 mask segmentation). Default: classic.",
+    )
+    parser.add_argument(
+        "--hand-mask",
+        type=str,
+        choices=["synthetic", "sam"],
+        default="sam",
+        help="Hand mask source: 'synthetic' (MediaPipe landmark convex hull) or 'sam' (SAM 2.1 pixel-accurate). Default: sam.",
+    )
     return parser.parse_args()
         json.dump(output, f, indent=2)
+# Debug visualisations are for human inspection, so there's no reason to
+# write a 12-megapixel PNG (encoding alone can take 1–2s on CPU). Cap the
+# long side and encode as JPEG — the on-disk path keeps its .png extension
+# for backwards compat with existing callers, but we write JPEG bytes when
+# the downscale is active to keep encoding well under ~100ms.
+_DEBUG_VIS_MAX_LONG_SIDE = 1600
+def _overlay_hand_skeleton(
+    image: np.ndarray,
+    landmarks: Optional[np.ndarray],
+    rotation_matrix: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """Draw the 21-point MediaPipe hand skeleton onto a debug image.
+    Landmarks are assumed to be in the canonical-image frame. If a precise
+    rotation was applied to align the finger vertically, pass the same
+    rotation_matrix so the skeleton lands on the rotated image.
+    """
+    if landmarks is None or len(landmarks) < 21:
+        return image
+    pts = np.asarray(landmarks, dtype=np.float64)
+    if rotation_matrix is not None:
+        from src.geometry import transform_points_rotation
+        pts = transform_points_rotation(pts, rotation_matrix)
+    return draw_hand_skeleton(image, pts)
+def _overlay_sam_masks(
+    image: np.ndarray,
+    hand_mask: Optional[np.ndarray] = None,
+    card_mask: Optional[np.ndarray] = None,
+    rotation_matrix: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """Tint the SAM hand and card masks onto a debug image.
+    Hand mask is rendered in cyan, card mask in green. Both are drawn as
+    semi-transparent fills plus a solid contour so the pixel-accurate SAM
+    silhouettes remain visible underneath downstream finger/edge overlays.
+    If ``rotation_matrix`` is supplied (because the caller applied a precise
+    finger-alignment rotation to the canonical image before this call), the
+    masks are rotated to match so they stay aligned with the image.
+    """
+    if hand_mask is None and card_mask is None:
+        return image
+    h, w = image.shape[:2]
+    out = image.copy()
+    def _prepare(mask: np.ndarray) -> Optional[np.ndarray]:
+        if mask is None:
+            return None
+        if mask.dtype != np.uint8:
+            m = (mask > 0).astype(np.uint8) * 255
+        else:
+            m = mask.copy()
+        if m.shape[:2] != (h, w):
+            m = cv2.resize(m, (w, h), interpolation=cv2.INTER_NEAREST)
+        if rotation_matrix is not None:
+            m = cv2.warpAffine(
+                m, rotation_matrix, (w, h),
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=0,
+            )
+        return m
+    hand_u8 = _prepare(hand_mask)
+    card_u8 = _prepare(card_mask)
+    # Semi-transparent fills
+    if hand_u8 is not None:
+        tint = np.zeros_like(out)
+        tint[hand_u8 > 0] = (255, 255, 0)  # cyan in BGR
+        out = cv2.addWeighted(out, 1.0, tint, 0.18, 0)
+    if card_u8 is not None:
+        tint = np.zeros_like(out)
+        tint[card_u8 > 0] = (0, 255, 0)    # green in BGR
+        out = cv2.addWeighted(out, 1.0, tint, 0.22, 0)
+    # Solid contours to emphasize the SAM-derived silhouette
+    if hand_u8 is not None:
+        contours, _ = cv2.findContours(hand_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(out, contours, -1, (255, 255, 0), 2, cv2.LINE_AA)
+    if card_u8 is not None:
+        contours, _ = cv2.findContours(card_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(out, contours, -1, (0, 255, 0), 2, cv2.LINE_AA)
+    return out
+def _save_debug_visualization(path: str, image: np.ndarray) -> None:
+    """Downscale + fast-encode a debug overlay image.
+    The web demo and validation scripts all consume this just for display,
+    so we trade 12 MP PNG encoding (~1–2s) for a ~1600 px JPEG (~50ms)
+    without changing the output file path.
+    """
+    Path(path).parent.mkdir(parents=True, exist_ok=True)
+    h, w = image.shape[:2]
+    long_side = max(h, w)
+    if long_side > _DEBUG_VIS_MAX_LONG_SIDE:
+        scale = _DEBUG_VIS_MAX_LONG_SIDE / long_side
+        new_size = (int(round(w * scale)), int(round(h * scale)))
+        image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
+    # JPEG is ~20× faster than PNG to encode at this size and visually
+    # indistinguishable for debug overlays.
+    ok, buf = cv2.imencode(".jpg", image, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
+    if not ok:
+        cv2.imwrite(path, image)  # fallback to whatever imwrite picks from ext
+        return
+    with open(path, "wb") as f:
+        f.write(buf.tobytes())
+def _sam_card_detect(
+    image_canonical: np.ndarray,
+    hand_data: Dict[str, Any],
+    save_debug: bool,
+    result_png_path: Optional[str],
+) -> Optional[Dict[str, Any]]:
+    """Run prompt-based SAM card detection.
+    No AMG fallback: empirically, if the 5x5 prompt grid doesn't find the
+    card, AMG won't either, and the ~20s AMG retry is pure cost. Returns
+    the card dict or None on failure.
+    """
+    debug_root = (
+        Path(result_png_path).parent if (save_debug and result_png_path is not None) else None
+    )
+    hand_mask = hand_data.get("mask")
+    landmarks = hand_data.get("landmarks")
+    if hand_mask is None:
+        return None
+    seeds = suggest_card_seeds(hand_mask, image_canonical.shape[:2])
+    if not seeds:
+        return None
+    negatives: List[Tuple[int, int]] = []
+    if landmarks is not None:
+        palm_idx = [0, 5, 9, 13, 17]
+        palm_c = np.mean(landmarks[palm_idx, :2], axis=0)
+        negatives.append((int(round(palm_c[0])), int(round(palm_c[1]))))
+    prompt_debug = str(debug_root / "sam_card_prompt_debug") if debug_root else None
+    return detect_credit_card_sam_prompt(
+        image_canonical,
+        seed_points=seeds,
+        negative_points=negatives,
+        debug_dir=prompt_debug,
+    )
 def measure_finger(
     image: np.ndarray,
     finger_index: FingerIndex = "index",
     save_intermediate: bool = False,
     result_png_path: Optional[str] = None,
     save_debug: bool = False,
+    edge_method: str = "mask",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
     skip_card_detection: bool = False,
+    card_method: str = "classic",
+    hand_mask_method: str = "sam",
     ring_model: str = DEFAULT_RING_MODEL,
 ) -> Dict[str, Any]:
     """
     Returns:
         Output dictionary with measurement results
     """
+    # Phase 2: Image quality metrics (informational only — no hard fail)
     quality = assess_image_quality(image)
     print(f"Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, "
           f"contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
         for issue in quality["issues"]:
+            print(f"  Note: {issue}")
     # Phase 3: Hand & finger segmentation (MOVED BEFORE CARD DETECTION)
     # This allows us to rotate the image to canonical orientation first
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
+    hand_data = segment_hand(
+        image,
+        finger=finger_index,
+        debug_dir=finger_debug_dir,
+        use_sam_mask=(hand_mask_method == "sam"),
+    )
     if hand_data is None:
         print("No hand detected in image")
         view_angle_ok = True
         card_detected = False
     else:
+        if card_method == "sam":
+            card_result = _sam_card_detect(
+                image_canonical, hand_data, save_debug, result_png_path
+            )
+        else:
+            card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             print("Credit card not detected in image")
     # Phase 5: Finger isolation (hand already segmented in Phase 3)
     h_can, w_can = image_canonical.shape[:2]
+    # Keep a reference to the raw SAM hand mask (pre-isolation polygon clip).
+    # mask_only edge detection needs the untrimmed silhouette — the isolation
+    # polygon in _create_finger_roi_mask is only ~1.08x the landmark segment
+    # length and can cut into a wider-than-average finger, which would make
+    # the mask boundary narrower than the true SAM boundary.
+    raw_hand_mask = hand_data.get("mask")
     finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
     if finger_data is None:
             borderValue=0
         )
+        # Also warp the raw SAM hand mask so mask_only mode can read the
+        # untrimmed silhouette in the same rotated frame as the image.
+        if raw_hand_mask is not None:
+            raw_hand_mask = cv2.warpAffine(
+                raw_hand_mask, rotation_matrix, (w_can, h_can),
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=0,
+            )
         print(f"Rotation applied: {angle_from_vertical:.1f}° CW, finger now vertical")
     else:
         print(f"Finger axis is {angle_from_vertical:.1f}° from vertical (within {rotation_threshold}° threshold, no rotation needed)")
     sobel_measurement = None
     sobel_failed = False
+    if edge_method in ["sobel", "mask", "auto", "compare"]:
         try:
+            # Pure Sobel mode drops the SAM mask; pure mask mode uses the SAM
+            # boundary directly without gradient snapping; auto/compare stay
+            # on the legacy hybrid path that combines both.
+            if edge_method == "sobel":
+                mask_mode = "sobel_only"
+            elif edge_method == "mask":
+                mask_mode = "mask_only"
+            else:
+                mask_mode = "hybrid"
+            print(f"Running edge refinement (mode={mask_mode}, threshold={sobel_threshold}, kernel={sobel_kernel_size})...")
             # Create debug directory for edge refinement if debug enabled
             edge_debug_dir = None
             if save_debug and result_png_path is not None:
                 edge_debug_dir = str(Path(result_png_path).parent / "edge_refinement_debug")
+            # mask_only reads boundaries directly from the mask, so it needs
+            # the *raw* SAM silhouette. The hybrid/sobel_only paths keep the
+            # isolation-trimmed mask they were validated against.
+            if mask_mode == "mask_only" and raw_hand_mask is not None:
+                edge_mask_input = raw_hand_mask
+            else:
+                edge_mask_input = cleaned_mask
             sobel_measurement = refine_edges_sobel(
                 image=image_canonical,  # Use canonical orientation
                 axis_data=axis_data,
                 sobel_threshold=sobel_threshold,
                 kernel_size=sobel_kernel_size,
                 use_subpixel=use_subpixel,
+                finger_mask=edge_mask_input,
                 debug_dir=edge_debug_dir,
+                mask_mode=mask_mode,
+                finger_name=finger_data.get("finger_name"),
             )
             sobel_width_cm = sobel_measurement["median_width_cm"]
+            print(f"Edge width: {sobel_width_cm:.4f}cm "
                   f"({sobel_measurement['num_samples']} samples, "
                   f"std={sobel_measurement['std_width_px']:.2f}px, "
                   f"quality={sobel_measurement['edge_quality']['overall_score']:.3f})")
         except Exception as e:
+            print(f"Edge refinement failed: {e}")
             sobel_failed = True
+            if edge_method in ("sobel", "mask"):
                 return create_output(
                     card_detected=card_detected,
                     finger_detected=True,
                     scale_px_per_cm=px_per_cm,
                     view_angle_ok=view_angle_ok,
                     fail_reason="sobel_edge_refinement_failed",
+                    edge_method_used=edge_method,
                 )
     # Select measurement method based on edge_method flag
         median_width_cm = sobel_measurement["median_width_cm"]
         edge_method_used = "sobel"
+    elif edge_method == "mask":
+        # Use SAM-mask boundary directly (already handled failure case above)
+        final_measurement = sobel_measurement
+        median_width_cm = sobel_measurement["median_width_cm"]
+        edge_method_used = "mask"
     elif edge_method == "auto":
         # Automatic selection based on quality
         if sobel_measurement and not sobel_failed:
     # Calculate edge quality confidence (v1)
     edge_quality_conf = None
+    if edge_method_used in ["sobel", "mask", "compare"]:
         edge_quality_conf = compute_edge_quality_confidence(
             final_measurement.get("edge_quality")
         )
         card_conf,
         finger_conf,
         measurement_conf,
+        edge_method="sobel" if edge_method_used in ["sobel", "mask", "compare"] else "contour",
         edge_quality_confidence=edge_quality_conf,
     )
         print(f"Generating result visualization...")
         # Use comprehensive edge overlay (based on Sobel data) + card bounding box
+        if edge_method_used in ["sobel", "mask", "compare"] and sobel_measurement and not sobel_failed:
             edge_data = sobel_measurement["edge_data"]
             roi_bounds = sobel_measurement["roi_data"]["roi_bounds"]
             width_data = sobel_measurement["width_data"]
             # Fallback: plain image with axis/zone annotations when Sobel unavailable
             debug_image = image_canonical.copy()
+        # Tint SAM hand + card masks as underlays. Both masks live in the
+        # pre-precise-rotation canonical frame, so apply the same rotation
+        # matrix that was used to align the finger.
+        debug_image = _overlay_sam_masks(
+            debug_image,
+            hand_mask=hand_data.get("mask") if hand_data else None,
+            card_mask=card_result.get("mask") if card_result else None,
+            rotation_matrix=rotation_matrix,
+        )
+        # Draw the MediaPipe hand skeleton so reviewers can see the detected
+        # landmarks. hand_data landmarks are in the pre-precise-rotation
+        # canonical frame, so apply the same rotation_matrix here.
+        debug_image = _overlay_hand_skeleton(
+            debug_image,
+            landmarks=hand_data.get("landmarks") if hand_data else None,
+            rotation_matrix=rotation_matrix,
+        )
         # Draw card bounding box (transform corners if image was rotated)
         if card_result is not None and "corners" in card_result:
             corners = card_result["corners"]
                 cv2.polylines(debug_image, [pts], isClosed=True,
                               color=(0, 255, 0), thickness=3, lineType=cv2.LINE_AA)
+        # Save result image (downscaled + JPEG-encoded for speed)
+        _save_debug_visualization(result_png_path, debug_image)
         print(f"Result visualization saved to: {result_png_path}")
     view_angle_ok: bool,
     card_result: Optional[Dict[str, Any]],
     scale_confidence: float,
+    edge_method: str = "mask",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
     )
     h_can, w_can = image_canonical.shape[:2]
+    raw_hand_mask = hand_data.get("mask")
     finger_data = isolate_finger(hand_data, finger=finger_name, image_shape=(h_can, w_can))
     if finger_data is None:
             cleaned_mask, rotation_matrix, (w_can, h_can),
             flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
         )
+        if raw_hand_mask is not None:
+            raw_hand_mask = cv2.warpAffine(
+                raw_hand_mask, rotation_matrix, (w_can, h_can),
+                flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0,
+            )
     # Ring zone
     try:
     # Sobel measurement
     sobel_measurement = None
     sobel_failed = False
+    if edge_method in ["sobel", "mask", "auto", "compare"]:
+        if edge_method == "sobel":
+            mask_mode = "sobel_only"
+        elif edge_method == "mask":
+            mask_mode = "mask_only"
+        else:
+            mask_mode = "hybrid"
         try:
+            if mask_mode == "mask_only" and raw_hand_mask is not None:
+                edge_mask_input = raw_hand_mask
+            else:
+                edge_mask_input = cleaned_mask
             sobel_measurement = refine_edges_sobel(
                 image=img_work, axis_data=axis_data, zone_data=zone_data,
                 scale_px_per_cm=px_per_cm, finger_landmarks=finger_data.get("landmarks"),
                 sobel_threshold=sobel_threshold, kernel_size=sobel_kernel_size,
                 use_subpixel=use_subpixel,
+                finger_mask=edge_mask_input,
+                mask_mode=mask_mode,
+                finger_name=finger_name,
             )
         except Exception:
             sobel_failed = True
+            if edge_method in ("sobel", "mask"):
                 return create_output(
                     card_detected=card_detected, finger_detected=True,
                     scale_px_per_cm=px_per_cm, view_angle_ok=view_angle_ok,
+                    fail_reason="sobel_edge_refinement_failed", edge_method_used=edge_method,
                 )
     # Select method
         median_width_cm = sobel_measurement["median_width_cm"]
         edge_method_used = "sobel"
         final_measurement = sobel_measurement
+    elif edge_method == "mask" and sobel_measurement:
+        median_width_cm = sobel_measurement["median_width_cm"]
+        edge_method_used = "mask"
+        final_measurement = sobel_measurement
     elif edge_method == "auto":
         if sobel_measurement and not sobel_failed:
             should_use, _ = should_use_sobel_measurement(sobel_measurement, contour_measurement)
     finger_conf = compute_finger_confidence(hand_data, finger_data, mask_area, image_area)
     measurement_conf = compute_measurement_confidence(final_measurement, median_width_cm)
     edge_quality_conf = None
+    if edge_method_used in ["sobel", "mask", "compare"]:
         edge_quality_conf = compute_edge_quality_confidence(final_measurement.get("edge_quality"))
     confidence_breakdown = compute_overall_confidence(
         card_conf, finger_conf, measurement_conf,
+        edge_method="sobel" if edge_method_used in ["sobel", "mask", "compare"] else "contour",
         edge_quality_confidence=edge_quality_conf,
     )
     confidence_threshold: float = 0.7,
     result_png_path: Optional[str] = None,
     save_debug: bool = False,
+    edge_method: str = "mask",
     sobel_threshold: float = 15.0,
     sobel_kernel_size: int = 3,
     use_subpixel: bool = True,
     skip_card_detection: bool = False,
     no_calibration: bool = False,
+    card_method: str = "classic",
+    hand_mask_method: str = "sam",
     ring_model: str = DEFAULT_RING_MODEL,
 ) -> Dict[str, Any]:
     """Measure index, middle, and ring fingers from a single image.
     """
     from src.finger_segmentation import FINGER_LANDMARKS
+    # Phase 1: Image quality metrics (informational only — no hard fail)
     quality = assess_image_quality(image)
     print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
         for issue in quality["issues"]:
+            print(f"  Note: {issue}")
     # Lighting uniformity check
     lighting = check_lighting_uniformity(image)
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
+    hand_data = segment_hand(
+        image,
+        finger="index",
+        debug_dir=finger_debug_dir,
+        use_sam_mask=(hand_mask_method == "sam"),
+    )
     if hand_data is None:
         print("[multi] No hand detected")
         return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
         view_angle_ok = True
         card_detected = False
     else:
+        if card_method == "sam":
+            card_result = _sam_card_detect(
+                image_canonical, hand_data, save_debug, result_png_path
+            )
+        else:
+            card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             return {"fail_reason": "card_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
         px_per_cm, scale_confidence = compute_scale_factor(card_result["corners"])
             card_result=card_result,
             px_per_cm=px_per_cm,
             result_png_path=result_png_path,
+            hand_mask=hand_data.get("mask") if hand_data else None,
+            hand_landmarks=hand_data.get("landmarks") if hand_data else None,
         )
     # Clean internal data from output
     card_result: Optional[Dict[str, Any]],
     px_per_cm: float,
     result_png_path: str,
+    hand_mask: Optional[np.ndarray] = None,
+    hand_landmarks: Optional[np.ndarray] = None,
 ) -> None:
     """Generate debug visualization for multi-finger measurement.
     vis = image_canonical.copy()
     h, w = vis.shape[:2]
+    # SAM silhouettes (hand + card) as tinted underlays
+    vis = _overlay_sam_masks(
+        vis,
+        hand_mask=hand_mask,
+        card_mask=(card_result.get("mask") if card_result else None),
+    )
+    # MediaPipe hand skeleton (canonical frame — no rotation needed since the
+    # multi-finger viz composes per-finger overlays via inverse rotation).
+    vis = _overlay_hand_skeleton(vis, landmarks=hand_landmarks)
+    # Draw card bounding box / dimensions on top of the tinted card mask
     if card_result is not None:
         vis = draw_card_overlay(vis, card_result, px_per_cm)
                              Color.GREEN, 1, cv2.LINE_AA)
                 count += 1
+    _save_debug_visualization(result_png_path, vis)
     print(f"\n[multi] Debug visualization saved to: {result_png_path}")
             use_subpixel=not args.no_subpixel,
             skip_card_detection=args.skip_card_detection,
             no_calibration=args.no_calibration,
+            card_method=args.card_method,
+            hand_mask_method=args.hand_mask,
             ring_model=args.ring_model,
         )
         sobel_kernel_size=args.sobel_kernel_size,
         use_subpixel=not args.no_subpixel,
         skip_card_detection=args.skip_card_detection,
+        card_method=args.card_method,
+        hand_mask_method=args.hand_mask,
         ring_model=args.ring_model,
     )

requirements.txt CHANGED Viewed

@@ -7,3 +7,8 @@ flask>=3.0.0
 gunicorn>=21.2.0
 openai>=1.0.0
 supabase>=2.0.0

 gunicorn>=21.2.0
 openai>=1.0.0
 supabase>=2.0.0
+# SAM 2.1 via HuggingFace transformers (card segmentation)
+torch>=2.4.0
+torchvision>=0.19.0
+transformers>=4.47.0
+pillow>=10.0.0

script/compare_hand_sam.py ADDED Viewed

	@@ -0,0 +1,227 @@

+"""Compare hand-mask quality across backends on a single image.
+Runs MediaPipe (current pipeline), SAM 2.1 tiny, and SAM 2.1 small using
+a point prompt at the palm center from MediaPipe landmarks. Saves a 4-panel
+side-by-side comparison and also writes each mask's contour + edge crop.
+"""
+from __future__ import annotations
+import sys
+import time
+from pathlib import Path
+from typing import Tuple
+import cv2
+import numpy as np
+from PIL import Image as PILImage
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+from src.finger_segmentation import segment_hand  # noqa: E402
+IMG_PATH = Path("input/sample-04-12/card_2.jpg")
+OUT_DIR = Path("output/hand_sam_compare")
+SAM_MODELS = [
+    ("sam2.1-tiny", "facebook/sam2.1-hiera-tiny"),
+    ("sam2.1-small", "facebook/sam2.1-hiera-small"),
+]
+def palm_and_card_points(image_bgr: np.ndarray, hand_data: dict) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+    """Return (palm_center, card_center) pixel coords in the canonical image space.
+    Palm center = mean of wrist + MCPs (landmarks 0, 5, 9, 13, 17).
+    Card center = a rough point to the left of the hand (negative prompt hint).
+    """
+    landmarks = hand_data.get("landmarks")
+    if landmarks is None:
+        raise RuntimeError("MediaPipe returned no landmarks")
+    # landmarks is (21, 2 or 3) in pixel coords
+    lm = np.asarray(landmarks)[:, :2]
+    palm_ids = [0, 5, 9, 13, 17]
+    palm_center = tuple(np.round(lm[palm_ids].mean(axis=0)).astype(int).tolist())
+    # Card hint: far from hand, toward image left
+    h, w = image_bgr.shape[:2]
+    hand_x_min = int(lm[:, 0].min())
+    card_x = max(50, hand_x_min - 150)
+    card_y = h // 2
+    return palm_center, (card_x, card_y)
+def run_sam(
+    model_id: str,
+    image_rgb: np.ndarray,
+    palm_xy: Tuple[int, int],
+    negative_xy: Tuple[int, int],
+) -> Tuple[np.ndarray, float, float]:
+    """Run SAM 2.1 with palm positive + card negative point. Returns (mask, score, seconds)."""
+    import torch
+    from transformers import Sam2Model, Sam2Processor
+    processor = Sam2Processor.from_pretrained(model_id)
+    model = Sam2Model.from_pretrained(model_id).to("cpu").eval()
+    pil = PILImage.fromarray(image_rgb)
+    input_points = [[[list(palm_xy), list(negative_xy)]]]
+    input_labels = [[[1, 0]]]
+    t0 = time.time()
+    inputs = processor(
+        images=pil,
+        input_points=input_points,
+        input_labels=input_labels,
+        return_tensors="pt",
+    )
+    with torch.inference_mode():
+        outputs = model(**inputs, multimask_output=True)
+    masks = processor.post_process_masks(
+        outputs.pred_masks.cpu(),
+        inputs["original_sizes"],
+        mask_threshold=0.0,
+    )[0][0]  # (num_candidates, H, W) for first image, first prompt set
+    scores = outputs.iou_scores.cpu().numpy()[0, 0]
+    best_idx = int(np.argmax(scores))
+    mask = masks[best_idx].numpy().astype(bool)
+    return mask, float(scores[best_idx]), time.time() - t0
+def mask_to_overlay(image_bgr: np.ndarray, mask: np.ndarray, color: Tuple[int, int, int]) -> np.ndarray:
+    """Return a BGR image with the mask tinted + contour drawn."""
+    out = image_bgr.copy()
+    tint = np.zeros_like(out)
+    tint[mask] = color
+    out = cv2.addWeighted(out, 1.0, tint, 0.35, 0)
+    contours, _ = cv2.findContours(
+        mask.astype(np.uint8) * 255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+    )
+    cv2.drawContours(out, contours, -1, color, 2, cv2.LINE_AA)
+    return out
+def label_panel(img: np.ndarray, text: str) -> np.ndarray:
+    h, w = img.shape[:2]
+    cv2.rectangle(img, (0, 0), (w, 60), (0, 0, 0), -1)
+    cv2.putText(img, text, (20, 42), cv2.FONT_HERSHEY_SIMPLEX, 1.3,
+                (255, 255, 255), 3, cv2.LINE_AA)
+    return img
+def main() -> int:
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    image_bgr = cv2.imread(str(IMG_PATH))
+    if image_bgr is None:
+        print(f"Failed to load {IMG_PATH}")
+        return 1
+    print(f"Image: {IMG_PATH} {image_bgr.shape}")
+    # --- MediaPipe baseline ---
+    t0 = time.time()
+    hand_data = segment_hand(image_bgr, finger="index")
+    mp_time = time.time() - t0
+    if hand_data is None:
+        print("MediaPipe detected no hand — aborting")
+        return 1
+    canonical_image = hand_data.get("canonical_image", image_bgr)
+    mp_mask = hand_data.get("mask")
+    if mp_mask is None:
+        print("MediaPipe did not return a hand mask")
+        return 1
+    mp_mask = mp_mask.astype(bool)
+    print(f"MediaPipe: {mp_time:.1f}s  mask_area={mp_mask.sum()}")
+    # Work in the canonical image so the comparison is apples-to-apples
+    image_for_sam = canonical_image.copy()
+    palm_xy, card_xy = palm_and_card_points(image_for_sam, hand_data)
+    print(f"Palm prompt: {palm_xy}  Negative hint: {card_xy}")
+    image_rgb = cv2.cvtColor(image_for_sam, cv2.COLOR_BGR2RGB)
+    # --- SAM models ---
+    results = {"mediapipe": (mp_mask, None, mp_time)}
+    for name, model_id in SAM_MODELS:
+        print(f"\n=== {name} ({model_id}) ===")
+        try:
+            mask, score, seconds = run_sam(model_id, image_rgb, palm_xy, card_xy)
+            # Align shape (should already be canonical)
+            if mask.shape != mp_mask.shape:
+                mask = cv2.resize(
+                    mask.astype(np.uint8),
+                    (mp_mask.shape[1], mp_mask.shape[0]),
+                    interpolation=cv2.INTER_NEAREST,
+                ).astype(bool)
+            print(f"  score={score:.3f}  time={seconds:.1f}s  area={mask.sum()}")
+            results[name] = (mask, score, seconds)
+        except Exception as e:
+            print(f"  FAILED: {e!r}")
+            import traceback
+            traceback.print_exc()
+    # --- Render panels ---
+    panels = []
+    colors = {
+        "mediapipe": (0, 165, 255),      # orange
+        "sam2.1-tiny": (0, 255, 255),    # yellow
+        "sam2.1-small": (0, 255, 0),     # green
+    }
+    # Panel 0: original with prompt points
+    orig = image_for_sam.copy()
+    cv2.circle(orig, palm_xy, 18, (0, 255, 0), -1)
+    cv2.circle(orig, palm_xy, 18, (0, 0, 0), 3)
+    cv2.circle(orig, card_xy, 18, (0, 0, 255), -1)
+    cv2.circle(orig, card_xy, 18, (0, 0, 0), 3)
+    panels.append(label_panel(orig, "original + prompts"))
+    for name in ["mediapipe", "sam2.1-tiny", "sam2.1-small"]:
+        if name not in results:
+            continue
+        mask, score, seconds = results[name]
+        panel = mask_to_overlay(image_for_sam, mask, colors[name])
+        label = f"{name}  {seconds:.1f}s"
+        if score is not None:
+            label += f"  score={score:.2f}"
+        panels.append(label_panel(panel, label))
+    # Save individual panels full-res
+    for i, p in enumerate(panels):
+        cv2.imwrite(str(OUT_DIR / f"panel_{i}_{['orig','mediapipe','tiny','small'][i]}.png"), p)
+    # Build a single side-by-side at a readable size
+    def resize_to_height(img: np.ndarray, H: int) -> np.ndarray:
+        h, w = img.shape[:2]
+        scale = H / h
+        return cv2.resize(img, (int(round(w * scale)), H), interpolation=cv2.INTER_AREA)
+    target_h = 900
+    resized = [resize_to_height(p, target_h) for p in panels]
+    combined = np.hstack(resized)
+    cv2.imwrite(str(OUT_DIR / "comparison_full.png"), combined)
+    # Also zoom-crop around the hand for fine-detail inspection
+    ys, xs = np.where(mp_mask)
+    if len(xs) > 0:
+        pad = 80
+        x0, x1 = max(0, xs.min() - pad), min(image_for_sam.shape[1], xs.max() + pad)
+        y0, y1 = max(0, ys.min() - pad), min(image_for_sam.shape[0], ys.max() + pad)
+        crops = []
+        for p in panels:
+            crop = p[y0:y1, x0:x1]
+            crops.append(resize_to_height(crop, target_h))
+        combined_zoom = np.hstack(crops)
+        cv2.imwrite(str(OUT_DIR / "comparison_zoom.png"), combined_zoom)
+    print(f"\nSaved panels to {OUT_DIR}/")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

script/validate_sam_card.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""Validate SAM card detection (classic vs AMG vs prompt) on sample-04-12.
+Prompt-based SAM depends on MediaPipe running first to provide a hand mask
+for seed derivation, so we run `segment_hand()` on each image before timing
+the three detectors.
+Outputs per-image rows and a summary with success counts + mean wall time.
+Debug overlays saved under `output/sam_val/<stem>/`.
+"""
+from __future__ import annotations
+import os
+import sys
+import time
+import traceback
+from pathlib import Path
+SKIP_AMG = bool(os.environ.get("SKIP_AMG"))
+import cv2
+import numpy as np
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+from src.card_detection import compute_scale_factor, detect_credit_card  # noqa: E402
+from src.finger_segmentation import segment_hand  # noqa: E402
+from src.sam_card_detection import (  # noqa: E402
+    detect_credit_card_sam,
+    detect_credit_card_sam_prompt,
+    suggest_card_seeds,
+)
+SAMPLE_DIR = Path("input/sample-04-12")
+OUT_DIR = Path("output/sam_val")
+def _negatives_from_landmarks(landmarks: np.ndarray):
+    palm_idx = [0, 5, 9, 13, 17]
+    c = np.mean(landmarks[palm_idx, :2], axis=0)
+    return [(int(round(c[0])), int(round(c[1])))]
+def run_one(img_path: Path) -> dict:
+    bgr = cv2.imread(str(img_path))
+    if bgr is None:
+        return {"file": img_path.name, "error": "load_failed"}
+    rec = {"file": img_path.name, "shape": bgr.shape[:2]}
+    # --- MediaPipe + SAM hand (needed for prompt-SAM seeds) ---
+    t0 = time.time()
+    try:
+        hand_data = segment_hand(bgr, finger="index", use_sam_mask=True)
+    except Exception as e:
+        hand_data = None
+        rec["hand_error"] = repr(e)[:120]
+    rec["hand_time_s"] = round(time.time() - t0, 2)
+    if hand_data is None:
+        rec["hand_detected"] = False
+        canonical = bgr
+    else:
+        rec["hand_detected"] = True
+        canonical = hand_data.get("canonical_image", bgr)
+    # --- Classic ---
+    t0 = time.time()
+    try:
+        classic = detect_credit_card(canonical)
+        if classic is not None:
+            px_cm, _ = compute_scale_factor(classic["corners"])
+            rec["classic_px_per_cm"] = px_cm
+        else:
+            rec["classic_px_per_cm"] = None
+    except Exception as e:
+        rec["classic_error"] = repr(e)[:120]
+    rec["classic_time_s"] = round(time.time() - t0, 2)
+    # --- SAM AMG ---
+    rec["amg_px_per_cm"] = None
+    rec["amg_time_s"] = None
+    if not SKIP_AMG:
+        amg_debug = OUT_DIR / img_path.stem / "sam_card_amg"
+        t0 = time.time()
+        try:
+            amg = detect_credit_card_sam(canonical, debug_dir=str(amg_debug))
+            if amg is not None:
+                px_cm, _ = compute_scale_factor(amg["corners"])
+                rec["amg_px_per_cm"] = px_cm
+        except Exception as e:
+            rec["amg_error"] = repr(e)[:120]
+            traceback.print_exc()
+        rec["amg_time_s"] = round(time.time() - t0, 2)
+    # --- SAM prompt ---
+    rec["prompt_px_per_cm"] = None
+    rec["prompt_time_s"] = None
+    if hand_data is not None:
+        prompt_debug = OUT_DIR / img_path.stem / "sam_card_prompt"
+        seeds = suggest_card_seeds(hand_data["mask"], canonical.shape[:2])
+        rec["prompt_n_seeds"] = len(seeds)
+        negs = _negatives_from_landmarks(hand_data["landmarks"])
+        t0 = time.time()
+        try:
+            pr = detect_credit_card_sam_prompt(
+                canonical,
+                seed_points=seeds,
+                negative_points=negs,
+                debug_dir=str(prompt_debug),
+            )
+            if pr is not None:
+                px_cm, _ = compute_scale_factor(pr["corners"])
+                rec["prompt_px_per_cm"] = px_cm
+        except Exception as e:
+            rec["prompt_error"] = repr(e)[:120]
+            traceback.print_exc()
+        rec["prompt_time_s"] = round(time.time() - t0, 2)
+    return rec
+def main() -> int:
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    images = sorted(SAMPLE_DIR.glob("*.jpg"))
+    if not images:
+        print(f"No images found in {SAMPLE_DIR}")
+        return 1
+    print(f"Validating {len(images)} images from {SAMPLE_DIR}\n")
+    results = []
+    for img in images:
+        print(f"=== {img.name} ===")
+        rec = run_one(img)
+        results.append(rec)
+        print(rec)
+        print()
+    # --- Summary table ---
+    print("\n===== SUMMARY =====")
+    header = (
+        f"{'file':<18}"
+        f"{'classic':>10}{'classicT':>10}"
+        f"{'amg':>10}{'amgT':>8}"
+        f"{'prompt':>10}{'promptT':>10}"
+    )
+    print(header)
+    print("-" * len(header))
+    counts = {"classic": 0, "amg": 0, "prompt": 0}
+    times = {"classic": [], "amg": [], "prompt": []}
+    for r in results:
+        def _fmt(v, fmt="{:.2f}"):
+            return fmt.format(v) if v is not None else "FAIL"
+        c = r.get("classic_px_per_cm")
+        a = r.get("amg_px_per_cm")
+        p = r.get("prompt_px_per_cm")
+        ct = r.get("classic_time_s")
+        at = r.get("amg_time_s")
+        pt = r.get("prompt_time_s")
+        print(
+            f"{r['file']:<18}"
+            f"{_fmt(c):>10}{_fmt(ct):>10}"
+            f"{_fmt(a):>10}{_fmt(at):>8}"
+            f"{_fmt(p):>10}{_fmt(pt):>10}"
+        )
+        if c is not None:
+            counts["classic"] += 1
+            times["classic"].append(ct)
+        if a is not None:
+            counts["amg"] += 1
+            times["amg"].append(at)
+        if p is not None:
+            counts["prompt"] += 1
+            times["prompt"].append(pt)
+    n = len(results)
+    print("-" * len(header))
+    for k in ("classic", "amg", "prompt"):
+        ok = counts[k]
+        mean_t = (sum(times[k]) / len(times[k])) if times[k] else float("nan")
+        print(f"{k:<8}  success: {ok}/{n}   mean_time_s: {mean_t:.2f}")
+    # Agreement check: when both prompt and amg succeeded, how close are scales?
+    agree = []
+    for r in results:
+        a, p = r.get("amg_px_per_cm"), r.get("prompt_px_per_cm")
+        if a is not None and p is not None:
+            agree.append(100 * abs(a - p) / max(a, p))
+    if agree:
+        print(f"\nprompt vs amg scale agreement: mean diff {np.mean(agree):.2f}%, "
+              f"max {max(agree):.2f}% (n={len(agree)})")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

src/edge_refinement.py CHANGED Viewed

@@ -93,6 +93,7 @@ def _find_edges_from_axis(
     row_mask: Optional[np.ndarray] = None,
     row_gradient_left_to_right: Optional[np.ndarray] = None,
     row_gradient_right_to_left: Optional[np.ndarray] = None,
 ) -> Optional[Tuple[float, float, float, float]]:
     """
     Find left and right edges by expanding from axis position.
@@ -131,72 +132,90 @@ def _find_edges_from_axis(
     left_search_gradient = row_gradient_right_to_left if row_gradient_right_to_left is not None else row_gradient
     right_search_gradient = row_gradient_left_to_right if row_gradient_left_to_right is not None else row_gradient
     # MASK-CONSTRAINED MODE (preferred when available)
     if row_mask is not None and np.any(row_mask):
-        # Strategy: Search FROM axis OUTWARD, constrained by mask
-        # This avoids picking background edges while using gradient precision
         mask_indices = np.where(row_mask)[0]
         if len(mask_indices) < 2:
             return None  # Mask too small
-        left_mask_boundary = mask_indices[0]
-        right_mask_boundary = mask_indices[-1]
-        # Search LEFT from axis, stopping at mask boundary
-        left_edge_x = None
-        left_strength = 0
-        # Start from axis, go left until we reach left mask boundary
-        search_start = max(left_mask_boundary, int(axis_x))
-        for x in range(search_start, left_mask_boundary - 1, -1):
-            if x < 0 or x >= len(row_gradient):
-                continue
-            if left_search_gradient[x] > threshold:
-                # Found a strong edge - update if stronger than previous
-                if left_search_gradient[x] > left_strength:
-                    left_edge_x = x
-                    left_strength = left_search_gradient[x]
-        # If no edge found with full threshold, try with relaxed threshold
-        if left_edge_x is None:
-            relaxed_threshold = threshold * 0.5
-            for x in range(search_start, left_mask_boundary - 1, -1):
-                if x < 0 or x >= len(row_gradient):
-                    continue
-                if left_search_gradient[x] > relaxed_threshold:
-                    if left_search_gradient[x] > left_strength:
-                        left_edge_x = x
-                        left_strength = left_search_gradient[x]
-        # Search RIGHT from axis, stopping at mask boundary
-        right_edge_x = None
-        right_strength = 0
-        # Start from axis, go right until we reach right mask boundary
-        search_start = min(right_mask_boundary, int(axis_x))
-        for x in range(search_start, right_mask_boundary + 1):
-            if x < 0 or x >= len(row_gradient):
-                continue
-            if right_search_gradient[x] > threshold:
-                # Found a strong edge - update if stronger than previous
-                if right_search_gradient[x] > right_strength:
-                    right_edge_x = x
-                    right_strength = right_search_gradient[x]
-        # If no edge found with full threshold, try with relaxed threshold
-        if right_edge_x is None:
-            relaxed_threshold = threshold * 0.5
-            for x in range(search_start, right_mask_boundary + 1):
-                if x < 0 or x >= len(row_gradient):
-                    continue
-                if right_search_gradient[x] > relaxed_threshold:
-                    if right_search_gradient[x] > right_strength:
-                        right_edge_x = x
-                        right_strength = right_search_gradient[x]
-        if left_edge_x is None or right_edge_x is None:
-            return None  # No valid edges found
     else:
         # AXIS-EXPANSION MODE (fallback when no mask)
@@ -240,7 +259,9 @@ def extract_ring_zone_roi(
     image: np.ndarray,
     axis_data: Dict[str, Any],
     zone_data: Dict[str, Any],
-    rotate_align: bool = False
 ) -> Dict[str, Any]:
     """
     Extract ROI around ring zone.
@@ -274,7 +295,14 @@ def extract_ring_zone_roi(
     zone_length = zone_data["length"]
     center = zone_data["center_point"]
     direction = axis_data["direction"]
-    half_height = zone_length * 0.25 # 0.5x / 2
     half_width = zone_length * 0.6  # 1.5x / 2
     x_min = int(np.clip(center[0] - half_width, 0, w - 1))
@@ -294,8 +322,22 @@ def extract_ring_zone_roi(
     # Convert to grayscale for edge detection
     roi_gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
-    # Full ROI mask — the ROI rectangle itself is the search constraint
-    roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
     # Create transform matrix (ROI coords -> original coords)
     # Simple translation for non-rotated case
@@ -473,7 +515,8 @@ def detect_edges_per_row(
     roi_data: Dict[str, Any],
     threshold: float = DEFAULT_GRADIENT_THRESHOLD,
     expected_width_px: Optional[float] = None,
-    scale_px_per_cm: Optional[float] = None
 ) -> Dict[str, Any]:
     """
     Detect left and right finger edges for each row (cross-section).
@@ -535,7 +578,12 @@ def detect_edges_per_row(
     # Get finger mask for constrained edge detection (if available)
     roi_mask = roi_data.get("roi_mask")
-    mode_used = "mask_constrained" if roi_mask is not None else "axis_expansion"
     if roi_mask is not None:
         logger.debug(f"Using MASK-CONSTRAINED edge detection (mask shape: {roi_mask.shape})")
@@ -568,7 +616,8 @@ def detect_edges_per_row(
             result = _find_edges_from_axis(row_gradient, row, axis_x, threshold,
                                           min_width_px, max_width_px, row_mask,
                                           row_gradient_left_to_right=row_gradient_l2r,
-                                          row_gradient_right_to_left=row_gradient_r2l)
             if result is None:
                 continue  # No valid edges found
@@ -958,56 +1007,73 @@ def should_use_sobel_measurement(
     """
     Decide whether to use Sobel measurement or fall back to contour.
-    Decision criteria:
     1. Edge quality score > min_quality_score (default 0.7)
     2. Edge consistency > min_consistency (default 0.5 = 50%)
-    3. If contour available: Sobel and contour agree within max_difference_pct
     Args:
         sobel_result: Output from refine_edges_sobel()
         contour_result: Optional output from compute_cross_section_width()
-        min_quality_score: Minimum acceptable quality score
-        min_consistency: Minimum edge detection success rate
         max_difference_pct: Maximum allowed difference from contour (%)
     Returns:
         Tuple of (should_use_sobel, reason)
     """
-    # Check if edge quality data available
     if "edge_quality" not in sobel_result:
         return False, "edge_quality_data_missing"
     edge_quality = sobel_result["edge_quality"]
-    # Check 1: Overall quality score
-    if edge_quality["overall_score"] < min_quality_score:
-        return False, f"quality_score_low_{edge_quality['overall_score']:.2f}"
-    # Check 2: Consistency (success rate)
-    if edge_quality["consistency_score"] < min_consistency:
-        return False, f"consistency_low_{edge_quality['consistency_score']:.2f}"
-    # Check 3: Measurement reasonableness
     sobel_width = sobel_result.get("median_width_cm")
     if sobel_width is None or sobel_width <= 0:
         return False, "invalid_measurement"
-    # Typical finger width range
     if sobel_width < MIN_REALISTIC_WIDTH_CM or sobel_width > MAX_REALISTIC_WIDTH_CM:
         return False, f"unrealistic_width_{sobel_width:.2f}cm"
-    # Check 4: Agreement with contour (if available)
     if contour_result is not None:
         contour_width = contour_result.get("median_width_px")
         sobel_width_px = sobel_result.get("median_width_px")
         if contour_width and sobel_width_px:
             diff_pct = abs(sobel_width_px - contour_width) / contour_width * 100
             if diff_pct > max_difference_pct:
                 return False, f"disagrees_with_contour_{diff_pct:.1f}pct"
-    # All checks passed
     return True, "quality_acceptable"
@@ -1023,6 +1089,9 @@ def refine_edges_sobel(
     use_subpixel: bool = True,
     expected_width_px: Optional[float] = None,
     debug_dir: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Main entry point for Sobel-based edge refinement.
@@ -1079,10 +1148,15 @@ def refine_edges_sobel(
         # A.2: Ring zone + ROI bounds (need to extract bounds first)
         # We'll save this after ROI extraction
-    # Step 1: Extract ROI
     roi_data = extract_ring_zone_roi(
         image, axis_data, zone_data,
-        rotate_align=rotate_align
     )
     logger.debug(f"ROI size: {roi_data['roi_width']}x{roi_data['roi_height']}px")
@@ -1117,12 +1191,14 @@ def refine_edges_sobel(
         grad_mag = draw_gradient_visualization(gradient_data["gradient_magnitude"], cv2.COLORMAP_HOT)
         observer.save_stage("06_gradient_magnitude", grad_mag)
-    # Step 3: Detect edges per row
     edge_data = detect_edges_per_row(
         gradient_data, roi_data,
         threshold=sobel_threshold,
         expected_width_px=expected_width_px,
-        scale_px_per_cm=scale_px_per_cm
     )
     logger.debug(f"Valid rows: {edge_data['num_valid_rows']}/{len(edge_data['valid_rows'])} ({edge_data['num_valid_rows']/len(edge_data['valid_rows'])*100:.1f}%)")
@@ -1151,11 +1227,13 @@ def refine_edges_sobel(
         # B.5: Selected edges (final detected edges)
         observer.draw_and_save("09_selected_edges", roi_data["roi_image"], draw_selected_edges, edge_data)
-    # Step 4: Measure width from edges (with sub-pixel refinement)
     width_data = measure_width_from_edges(
         edge_data, roi_data, scale_px_per_cm,
         gradient_data=gradient_data,
-        use_subpixel=use_subpixel
     )
     if debug_dir:

     row_mask: Optional[np.ndarray] = None,
     row_gradient_left_to_right: Optional[np.ndarray] = None,
     row_gradient_right_to_left: Optional[np.ndarray] = None,
+    mask_only: bool = False,
 ) -> Optional[Tuple[float, float, float, float]]:
     """
     Find left and right edges by expanding from axis position.
     left_search_gradient = row_gradient_right_to_left if row_gradient_right_to_left is not None else row_gradient
     right_search_gradient = row_gradient_left_to_right if row_gradient_left_to_right is not None else row_gradient
+    # In mask_only mode a row with no mask pixels (or a mask that doesn't
+    # contain the finger axis) must be dropped. Otherwise we would fall
+    # through to the gradient axis-expansion path below, which routinely
+    # returns ROI-edge coordinates as "edges" on empty rows and poisons the
+    # width median.
+    if mask_only and (row_mask is None or not np.any(row_mask)):
+        return None
     # MASK-CONSTRAINED MODE (preferred when available)
     if row_mask is not None and np.any(row_mask):
+        # Strategy: the SAM mask already knows where the finger boundary is
+        # to pixel accuracy. We anchor to the mask boundary by default, and
+        # only snap to a nearby gradient peak when one exceeds the threshold
+        # (for sub-pixel refinement). When contrast is weak (pale finger on
+        # light background) the gradient search yields nothing, so trusting
+        # the mask directly is what prevents "no valid widths" failures.
         mask_indices = np.where(row_mask)[0]
         if len(mask_indices) < 2:
             return None  # Mask too small
+        # Pick the contiguous run of mask pixels that contains the finger
+        # axis. This matters when the raw SAM hand mask is passed in (mask_only
+        # path): at the ring-zone rows the mask may include adjacent fingers,
+        # and np.where(...)[0][0]/[-1] would then span across fingers.
+        axis_col = int(round(axis_x))
+        axis_col = max(0, min(len(row_mask) - 1, axis_col))
+        if not row_mask[axis_col]:
+            # Axis is off the mask on this row — the ROI is clipping into
+            # background (e.g. ring/pinky ROI reaching the MCP webbing).
+            # Treat the row as invalid rather than snapping to whatever mask
+            # run happens to be nearest; otherwise the wrong run can pull
+            # the median width up.
+            return None
+        # Expand left/right from the axis until we leave the mask run.
+        left_mask_boundary = axis_col
+        while left_mask_boundary > 0 and row_mask[left_mask_boundary - 1]:
+            left_mask_boundary -= 1
+        right_mask_boundary = axis_col
+        max_col = len(row_mask) - 1
+        while right_mask_boundary < max_col and row_mask[right_mask_boundary + 1]:
+            right_mask_boundary += 1
+        # If the contiguous run reaches either ROI edge the mask has bled
+        # out of the ROI laterally — almost always via the webbing into the
+        # adjacent finger. The column we stopped at is the ROI edge, not
+        # the true finger boundary, so the row is unreliable.
+        if mask_only and (left_mask_boundary == 0 or right_mask_boundary == max_col):
+            return None
+        SEARCH_RANGE = 10  # px on either side of mask boundary to refine
+        def _snap_to_peak(
+            boundary: int,
+            gradient: np.ndarray,
+        ) -> Tuple[float, float]:
+            """Return (edge_x, strength). Snap to local gradient peak if
+            it exceeds threshold, otherwise fall back to boundary itself."""
+            lo = max(0, boundary - SEARCH_RANGE)
+            hi = min(len(gradient) - 1, boundary + SEARCH_RANGE)
+            if hi < lo:
+                return float(boundary), 0.0
+            window = gradient[lo:hi + 1]
+            best_rel = int(np.argmax(window))
+            best_val = float(window[best_rel])
+            if best_val > threshold:
+                return float(lo + best_rel), best_val
+            # Weak gradient: trust the SAM mask boundary directly.
+            return float(boundary), float(gradient[boundary])
+        if mask_only:
+            # Trust the SAM mask boundary exactly; no gradient snapping.
+            left_edge_x = float(left_mask_boundary)
+            right_edge_x = float(right_mask_boundary)
+            left_strength = float(left_search_gradient[left_mask_boundary])
+            right_strength = float(right_search_gradient[right_mask_boundary])
+        else:
+            left_edge_x, left_strength = _snap_to_peak(
+                left_mask_boundary, left_search_gradient
+            )
+            right_edge_x, right_strength = _snap_to_peak(
+                right_mask_boundary, right_search_gradient
+            )
     else:
         # AXIS-EXPANSION MODE (fallback when no mask)
     image: np.ndarray,
     axis_data: Dict[str, Any],
     zone_data: Dict[str, Any],
+    rotate_align: bool = False,
+    finger_mask: Optional[np.ndarray] = None,
+    finger_name: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Extract ROI around ring zone.
     zone_length = zone_data["length"]
     center = zone_data["center_point"]
     direction = axis_data["direction"]
+    # Ring and pinky have their proximal phalanx set lower on the palm than
+    # index/middle, so an ROI sized for the latter reaches down into the
+    # MCP webbing — those rows bleed into the adjacent finger's mask. Use
+    # half the vertical span (0.25x zone length) for those fingers.
+    if finger_name in ("ring", "pinky"):
+        half_height = zone_length * 0.25   # 0.25x / 2
+    else:
+        half_height = zone_length * 0.25   # 0.5x / 2
     half_width = zone_length * 0.6  # 1.5x / 2
     x_min = int(np.clip(center[0] - half_width, 0, w - 1))
     # Convert to grayscale for edge detection
     roi_gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
+    # Build the per-row search constraint. Prefer a pixel-accurate finger mask
+    # when one is supplied (e.g. SAM 2.1 output). Falling back to a full-rect
+    # all-ones mask preserves legacy behaviour.
+    if finger_mask is not None:
+        fm = finger_mask
+        if fm.dtype != np.uint8:
+            fm = (fm > 0).astype(np.uint8) * 255
+        if fm.shape[:2] != image.shape[:2]:
+            fm = cv2.resize(
+                fm, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST
+            )
+        roi_mask = fm[y_min:y_max, x_min:x_max].copy()
+        if not np.any(roi_mask):
+            roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
+    else:
+        roi_mask = np.ones((roi_height, roi_width), dtype=np.uint8) * 255
     # Create transform matrix (ROI coords -> original coords)
     # Simple translation for non-rotated case
     roi_data: Dict[str, Any],
     threshold: float = DEFAULT_GRADIENT_THRESHOLD,
     expected_width_px: Optional[float] = None,
+    scale_px_per_cm: Optional[float] = None,
+    mask_only: bool = False,
 ) -> Dict[str, Any]:
     """
     Detect left and right finger edges for each row (cross-section).
     # Get finger mask for constrained edge detection (if available)
     roi_mask = roi_data.get("roi_mask")
+    if mask_only and roi_mask is not None:
+        mode_used = "mask_only"
+    elif roi_mask is not None:
+        mode_used = "mask_constrained"
+    else:
+        mode_used = "axis_expansion"
     if roi_mask is not None:
         logger.debug(f"Using MASK-CONSTRAINED edge detection (mask shape: {roi_mask.shape})")
             result = _find_edges_from_axis(row_gradient, row, axis_x, threshold,
                                           min_width_px, max_width_px, row_mask,
                                           row_gradient_left_to_right=row_gradient_l2r,
+                                          row_gradient_right_to_left=row_gradient_r2l,
+                                          mask_only=mask_only)
             if result is None:
                 continue  # No valid edges found
     """
     Decide whether to use Sobel measurement or fall back to contour.
+    When the edge detection ran in ``mask_constrained`` mode (a pixel-accurate
+    finger mask was supplied, e.g. from SAM 2.1), the SAM mask boundary IS
+    the ground truth — a low gradient score just reflects weak finger/background
+    contrast, not a bad measurement. In that case we skip the quality/gradient
+    gates entirely and only enforce the safety checks (plausible width,
+    non-empty sample set, not wildly at odds with the contour baseline).
+    Decision criteria (non-masked path):
     1. Edge quality score > min_quality_score (default 0.7)
     2. Edge consistency > min_consistency (default 0.5 = 50%)
+    3. Realistic width range
+    4. If contour available: agreement within max_difference_pct
+    Decision criteria (mask_constrained path):
+    1. Non-empty sample set
+    2. Realistic width range
     Args:
         sobel_result: Output from refine_edges_sobel()
         contour_result: Optional output from compute_cross_section_width()
+        min_quality_score: Minimum acceptable quality score (ignored for masked)
+        min_consistency: Minimum edge detection success rate (ignored for masked)
         max_difference_pct: Maximum allowed difference from contour (%)
     Returns:
         Tuple of (should_use_sobel, reason)
     """
     if "edge_quality" not in sobel_result:
         return False, "edge_quality_data_missing"
     edge_quality = sobel_result["edge_quality"]
+    mode_used = sobel_result.get("edge_data", {}).get("mode_used", "axis_expansion")
+    mask_anchored = mode_used in ("mask_constrained", "mask_only")
     sobel_width = sobel_result.get("median_width_cm")
     if sobel_width is None or sobel_width <= 0:
         return False, "invalid_measurement"
     if sobel_width < MIN_REALISTIC_WIDTH_CM or sobel_width > MAX_REALISTIC_WIDTH_CM:
         return False, f"unrealistic_width_{sobel_width:.2f}cm"
+    if mask_anchored:
+        # SAM mask is the source of truth, but we still require enough
+        # valid rows to form a robust median. A low sample count with an
+        # otherwise reasonable width usually indicates the per-finger mask
+        # bled into an adjacent finger and width validation killed most
+        # rows — contour is safer in that situation.
+        MIN_MASK_SAMPLES = 20  # parity with the contour path's 20 samples
+        num_samples = int(sobel_result.get("num_samples", 0))
+        if num_samples < MIN_MASK_SAMPLES:
+            return False, f"mask_samples_low_{num_samples}"
+        return True, "mask_anchored"
+    # Non-masked path: preserve the original gradient-quality gates.
+    if edge_quality["overall_score"] < min_quality_score:
+        return False, f"quality_score_low_{edge_quality['overall_score']:.2f}"
+    if edge_quality["consistency_score"] < min_consistency:
+        return False, f"consistency_low_{edge_quality['consistency_score']:.2f}"
     if contour_result is not None:
         contour_width = contour_result.get("median_width_px")
         sobel_width_px = sobel_result.get("median_width_px")
         if contour_width and sobel_width_px:
             diff_pct = abs(sobel_width_px - contour_width) / contour_width * 100
             if diff_pct > max_difference_pct:
                 return False, f"disagrees_with_contour_{diff_pct:.1f}pct"
     return True, "quality_acceptable"
     use_subpixel: bool = True,
     expected_width_px: Optional[float] = None,
     debug_dir: Optional[str] = None,
+    finger_mask: Optional[np.ndarray] = None,
+    mask_mode: str = "hybrid",
+    finger_name: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Main entry point for Sobel-based edge refinement.
         # A.2: Ring zone + ROI bounds (need to extract bounds first)
         # We'll save this after ROI extraction
+    # Step 1: Extract ROI. Pure-Sobel mode drops the SAM mask so the gradient
+    # search expands from the finger axis without any mask anchoring; the
+    # "mask_only" and legacy "hybrid" modes both pass the mask through.
+    roi_finger_mask = None if mask_mode == "sobel_only" else finger_mask
     roi_data = extract_ring_zone_roi(
         image, axis_data, zone_data,
+        rotate_align=rotate_align,
+        finger_mask=roi_finger_mask,
+        finger_name=finger_name,
     )
     logger.debug(f"ROI size: {roi_data['roi_width']}x{roi_data['roi_height']}px")
         grad_mag = draw_gradient_visualization(gradient_data["gradient_magnitude"], cv2.COLORMAP_HOT)
         observer.save_stage("06_gradient_magnitude", grad_mag)
+    # Step 3: Detect edges per row. In "mask_only" mode the mask boundary is
+    # used verbatim so gradient snapping is disabled.
     edge_data = detect_edges_per_row(
         gradient_data, roi_data,
         threshold=sobel_threshold,
         expected_width_px=expected_width_px,
+        scale_px_per_cm=scale_px_per_cm,
+        mask_only=(mask_mode == "mask_only"),
     )
     logger.debug(f"Valid rows: {edge_data['num_valid_rows']}/{len(edge_data['valid_rows'])} ({edge_data['num_valid_rows']/len(edge_data['valid_rows'])*100:.1f}%)")
         # B.5: Selected edges (final detected edges)
         observer.draw_and_save("09_selected_edges", roi_data["roi_image"], draw_selected_edges, edge_data)
+    # Step 4: Measure width from edges (with sub-pixel refinement).
+    # Sub-pixel refinement is gradient-based, so it is skipped in mask_only.
+    effective_subpixel = use_subpixel and mask_mode != "mask_only"
     width_data = measure_width_from_edges(
         edge_data, roi_data, scale_px_per_cm,
         gradient_data=gradient_data,
+        use_subpixel=effective_subpixel,
     )
     if debug_dir:

src/finger_segmentation.py CHANGED Viewed

@@ -278,6 +278,7 @@ def segment_hand(
     finger: FingerIndex = "index",
     max_dimension: int = 1280,
     debug_dir: Optional[str] = None,
 ) -> Optional[Dict[str, Any]]:
     """
     Detect and segment hand from image using MediaPipe.
@@ -292,10 +293,17 @@ def segment_hand(
         Dictionary containing:
         - landmarks: 21x2 array of landmark positions (pixel coordinates)
         - landmarks_normalized: 21x2 array of normalized coordinates [0-1]
-        - mask: Binary hand mask
         - confidence: Detection confidence
         - handedness: "Left" or "Right"
         Or None if no hand detected
     """
     # Create debug observer if debug mode enabled
     observer = DebugObserver(debug_dir) if debug_dir else None
@@ -427,13 +435,34 @@ def segment_hand(
                              handedness[0].category_name,
                              f"det={rotation_code}, orient={orientation_rotation}")
-    # Generate hand mask at canonical resolution
-    mask = _create_hand_mask(landmarks_canonical, (can_full_h, can_full_w))
     return {
         "landmarks": landmarks_canonical,
         "landmarks_normalized": landmarks_normalized_canonical,
         "mask": mask,
         "confidence": handedness[0].score,
         "handedness": handedness[0].category_name,
         "rotation_applied": rotation_code,

     finger: FingerIndex = "index",
     max_dimension: int = 1280,
     debug_dir: Optional[str] = None,
+    use_sam_mask: bool = True,
 ) -> Optional[Dict[str, Any]]:
     """
     Detect and segment hand from image using MediaPipe.
         Dictionary containing:
         - landmarks: 21x2 array of landmark positions (pixel coordinates)
         - landmarks_normalized: 21x2 array of normalized coordinates [0-1]
+        - mask: Binary hand mask (pixel-accurate SAM 2.1 mask if use_sam_mask=True,
+          else synthetic convex-hull fallback from landmarks)
+        - mask_synthetic: Synthetic fallback mask (always populated for debug)
         - confidence: Detection confidence
         - handedness: "Left" or "Right"
         Or None if no hand detected
+    Args:
+        use_sam_mask: If True (default), call SAM 2.1 seeded by the palm-center
+            landmark to produce a pixel-accurate hand mask. If False, use the
+            legacy synthetic convex-hull mask built from landmarks only.
     """
     # Create debug observer if debug mode enabled
     observer = DebugObserver(debug_dir) if debug_dir else None
                              handedness[0].category_name,
                              f"det={rotation_code}, orient={orientation_rotation}")
+    # Legacy synthetic mask (convex hull of landmarks) — kept for fallback and debug
+    synthetic_mask = _create_hand_mask(landmarks_canonical, (can_full_h, can_full_w))
+    # SAM 2.1 pixel-accurate hand mask (default), seeded by palm center landmark
+    mask = synthetic_mask
+    mask_source = "synthetic"
+    if use_sam_mask:
+        try:
+            from .sam_hand_segmentation import segment_hand_sam, palm_center_from_landmarks
+            palm_xy = palm_center_from_landmarks(landmarks_canonical)
+            sam_debug_dir = str(Path(debug_dir) / "sam_hand") if debug_dir else None
+            sam_mask_bool = segment_hand_sam(
+                canonical_full, palm_xy, debug_dir=sam_debug_dir
+            )
+            if sam_mask_bool is not None:
+                mask = (sam_mask_bool.astype(np.uint8) * 255)
+                mask_source = "sam"
+            else:
+                print("  SAM hand mask returned None, falling back to synthetic")
+        except Exception as e:
+            print(f"  SAM hand mask failed ({e!r}), falling back to synthetic")
     return {
         "landmarks": landmarks_canonical,
         "landmarks_normalized": landmarks_normalized_canonical,
         "mask": mask,
+        "mask_synthetic": synthetic_mask,
+        "mask_source": mask_source,
         "confidence": handedness[0].score,
         "handedness": handedness[0].category_name,
         "rotation_applied": rotation_code,

src/geometry.py CHANGED Viewed

@@ -92,22 +92,28 @@ def estimate_finger_axis_from_landmarks(
     """
     Calculate finger axis directly from anatomical landmarks.
-    OPTIMIZED: Focuses on DIP-PIP segment (ring-wearing zone) for better accuracy.
     Args:
         landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
         method: Calculation method
             - "endpoints": MCP to TIP vector (legacy, less accurate)
-            - "linear_fit": DIP to PIP vector (DEFAULT, optimized for ring measurements)
             - "median_direction": Median of 3 segment directions (robust to outliers)
     Returns:
         Dictionary containing:
-        - center: Axis center point at midpoint of PIP-DIP (x, y)
-        - direction: Unit direction vector (dx, dy) from PIP to DIP
         - length: Full finger length in pixels (TIP to MCP, for reference)
-        - palm_end: Visualization endpoint (extended from PIP toward palm)
-        - tip_end: Visualization endpoint (extended from DIP toward tip)
         - method: Method used ("landmarks")
     """
     # Validate landmarks
@@ -122,7 +128,7 @@ def estimate_finger_axis_from_landmarks(
     tip = landmarks[3]  # Fingertip
     # Calculate direction based on method
-    # OPTIMIZED: Focus on DIP-PIP segment (ring-wearing zone)
     if method == "endpoints":
         # Simple: vector from MCP to TIP (legacy, less accurate for ring zone)
         direction = tip - mcp
@@ -130,14 +136,16 @@ def estimate_finger_axis_from_landmarks(
         direction = direction / direction_length
     elif method == "linear_fit":
-        # OPTIMIZED: Use only DIP and PIP (most relevant for ring measurements)
-        # These two joints define the proximal phalanx where rings are worn
-        direction = dip - pip  # Vector from PIP to DIP
         direction_length = np.linalg.norm(direction)
         direction = direction / direction_length
-        # Ensure direction points from palm to tip (PIP to DIP)
-        # Direction should already be correct, but verify
         if np.dot(direction, tip - mcp) < 0:
             direction = -direction
@@ -156,18 +164,18 @@ def estimate_finger_axis_from_landmarks(
     else:
         raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'")
-    # OPTIMIZED: Center at midpoint of DIP and PIP (ring zone focus)
-    center = (pip + dip) / 2.0
     # Calculate finger length (still use full finger for reference)
     length = np.linalg.norm(tip - mcp)
-    # OPTIMIZED: Visual endpoints are DIP and PIP (ring zone segment)
-    # Extended slightly for visualization clarity
-    segment_length = np.linalg.norm(dip - pip)
     extension_factor = 0.5  # Extend 50% beyond each endpoint for visualization
-    palm_end = pip - direction * (segment_length * extension_factor)
-    tip_end = dip + direction * (segment_length * extension_factor)
     return {
         "center": center.astype(np.float32),

     """
     Calculate finger axis directly from anatomical landmarks.
+    OPTIMIZED: Focuses on the PIP-MCP segment (proximal phalanx, where the
+    ring actually sits) for better accuracy. For straight fingers (index,
+    middle) this agrees with the DIP-PIP direction to within ~1°, but ring
+    and pinky often hold a visible PIP-joint curl, so the proximal phalanx
+    is at a different angle from the middle phalanx. Rotating by the
+    proximal-phalanx direction makes the ring zone exactly vertical and
+    cross-sections perpendicular to the bone we measure.
     Args:
         landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
         method: Calculation method
             - "endpoints": MCP to TIP vector (legacy, less accurate)
+            - "linear_fit": MCP to PIP vector (DEFAULT, proximal phalanx)
             - "median_direction": Median of 3 segment directions (robust to outliers)
     Returns:
         Dictionary containing:
+        - center: Axis center point at midpoint of MCP-PIP (x, y)
+        - direction: Unit direction vector (dx, dy) pointing palm→tip
         - length: Full finger length in pixels (TIP to MCP, for reference)
+        - palm_end: Visualization endpoint (extended from MCP toward palm)
+        - tip_end: Visualization endpoint (extended from PIP toward tip)
         - method: Method used ("landmarks")
     """
     # Validate landmarks
     tip = landmarks[3]  # Fingertip
     # Calculate direction based on method
+    # OPTIMIZED: Focus on the PIP-MCP segment (proximal phalanx = ring zone)
     if method == "endpoints":
         # Simple: vector from MCP to TIP (legacy, less accurate for ring zone)
         direction = tip - mcp
         direction = direction / direction_length
     elif method == "linear_fit":
+        # OPTIMIZED: Use MCP→PIP, the proximal phalanx bone that a ring
+        # actually rests on. For ring and pinky this differs from the old
+        # DIP-PIP direction by the PIP-joint curl angle, which was
+        # silently tilting the measurement frame.
+        direction = pip - mcp  # Vector from MCP to PIP (palm→tip)
         direction_length = np.linalg.norm(direction)
         direction = direction / direction_length
+        # Sanity check: direction should point palm→tip. (MCP→PIP already
+        # does, but verify in case landmarks are swapped.)
         if np.dot(direction, tip - mcp) < 0:
             direction = -direction
     else:
         raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'")
+    # OPTIMIZED: Center on the proximal phalanx midpoint (the ring zone).
+    center = (mcp + pip) / 2.0
     # Calculate finger length (still use full finger for reference)
     length = np.linalg.norm(tip - mcp)
+    # OPTIMIZED: Visual endpoints span the proximal phalanx (MCP→PIP)
+    # extended slightly for visualization clarity.
+    segment_length = np.linalg.norm(pip - mcp)
     extension_factor = 0.5  # Extend 50% beyond each endpoint for visualization
+    palm_end = mcp - direction * (segment_length * extension_factor)
+    tip_end = pip + direction * (segment_length * extension_factor)
     return {
         "center": center.astype(np.float32),

src/sam_backend.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""Shared lazy singleton for SAM 2.1 Tiny (model + processor).
+Both card detection (prompt-based) and hand segmentation use the same
+HuggingFace weights, so loading them once per process halves cold-start
+cost and keeps only one copy of the encoder in memory.
+"""
+from __future__ import annotations
+import os
+import time
+from typing import Tuple
+# Bump the default HF Hub HEAD/download timeout (10s) before transformers
+# reads the env var. On flaky networks the 10s HEAD check fires a retry storm
+# even when the weights are already cached locally.
+os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
+SAM2_MODEL_ID = "facebook/sam2.1-hiera-large"
+# SAM resizes internally to 1024 — feeding >1024 wastes CPU on image encoding.
+INFERENCE_MAX_SIDE = 1024
+_model = None
+_processor = None
+def get_sam2() -> Tuple[object, object]:
+    """Return (model, processor) singletons, loading on first call.
+    Tries the local HF cache first (``local_files_only=True``). This avoids
+    the HEAD-request retry storm that happens when huggingface.co is slow or
+    unreachable but the weights are already on disk. On a true cache miss we
+    fall through to a normal online load.
+    """
+    global _model, _processor
+    if _model is None or _processor is None:
+        from transformers import Sam2Model, Sam2Processor
+        t0 = time.time()
+        print(f"  Loading SAM 2.1 ({SAM2_MODEL_ID})...")
+        try:
+            _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True)
+            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to("cpu").eval()
+            print(f"  SAM 2.1 loaded (offline cache) in {time.time() - t0:.1f}s")
+        except (OSError, ValueError):
+            # Cache miss — fall back to online download.
+            _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID)
+            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to("cpu").eval()
+            print(f"  SAM 2.1 loaded (online) in {time.time() - t0:.1f}s")
+    return _model, _processor

src/sam_card_detection.py ADDED Viewed

	@@ -0,0 +1,614 @@

+"""
+SAM 2.1-based credit card detection.
+Uses Meta's Segment Anything 2.1 (Hiera Tiny) via HuggingFace transformers
+to produce a pixel-accurate card mask, then filters candidate masks by area,
+rectangularity, and aspect ratio (~1.586) to pick the credit card.
+Drop-in replacement for `card_detection.detect_credit_card`: returns a dict
+with the same keys so the downstream pipeline is unchanged.
+"""
+from __future__ import annotations
+import os
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+from .card_detection import (
+    CARD_ASPECT_RATIO,
+    MAX_CARD_AREA_RATIO,
+    MIN_CARD_AREA_RATIO,
+    get_quad_dimensions,
+    order_corners,
+)
+from .sam_backend import INFERENCE_MAX_SIDE as PROMPT_INFERENCE_MAX_SIDE, get_sam2
+# HF Hub model id — small, fast SAM 2.1 variant that fits CPU / HF free Spaces
+SAM2_MODEL_ID = "facebook/sam2.1-hiera-large"
+# Downscale large images before inference to keep CPU runtime tractable.
+# SAM 2.1 internally resizes to 1024, so feeding >1024 is pure overhead.
+INFERENCE_MAX_SIDE = 1024
+# Automatic mask generation grid density. 16 gives ~256 prompts — enough to
+# hit a credit card reliably without blowing up CPU time.
+POINTS_PER_SIDE = 16
+POINTS_PER_BATCH = 64
+# Candidate filtering
+MIN_RECTANGULARITY = 0.90  # mask_area / minAreaRect_area; card mask is near-perfect rectangle
+ASPECT_RATIO_TOLERANCE = 0.15  # fractional deviation from 1.586
+_pipeline = None  # lazy singleton
+def _get_pipeline():
+    """Lazy-load the SAM 2.1 mask-generation pipeline (module-level singleton).
+    Tries the local HF cache first to sidestep huggingface.co HEAD-request
+    timeouts when the weights are already on disk; falls back to a normal
+    online load on a true cache miss.
+    """
+    global _pipeline
+    if _pipeline is None:
+        import os as _os
+        _os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
+        from transformers import pipeline as hf_pipeline
+        t0 = time.time()
+        print(f"  Loading SAM 2.1 ({SAM2_MODEL_ID})...")
+        try:
+            _pipeline = hf_pipeline(
+                "mask-generation", model=SAM2_MODEL_ID, device="cpu",
+                model_kwargs={"local_files_only": True},
+            )
+            print(f"  SAM 2.1 loaded (offline cache) in {time.time() - t0:.1f}s")
+        except (OSError, ValueError):
+            _pipeline = hf_pipeline("mask-generation", model=SAM2_MODEL_ID, device="cpu")
+            print(f"  SAM 2.1 loaded (online) in {time.time() - t0:.1f}s")
+    return _pipeline
+def _downscale_for_inference(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
+    """Downscale image so the long side is INFERENCE_MAX_SIDE. Returns (scaled, scale_factor).
+    scale_factor is original/scaled, so coords in the scaled space * scale_factor
+    -> coords in the original space.
+    """
+    h, w = image_bgr.shape[:2]
+    long_side = max(h, w)
+    if long_side <= INFERENCE_MAX_SIDE:
+        return image_bgr, 1.0
+    scale = INFERENCE_MAX_SIDE / long_side
+    new_w = int(round(w * scale))
+    new_h = int(round(h * scale))
+    scaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    return scaled, 1.0 / scale  # factor to go scaled -> original
+def _mask_to_bool_array(mask: Any, target_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
+    """Convert SAM output mask (torch tensor, PIL, or ndarray) to a bool ndarray.
+    If target_shape is given and differs, resize with nearest neighbor.
+    """
+    if hasattr(mask, "cpu"):
+        arr = mask.cpu().numpy()
+    else:
+        arr = np.asarray(mask)
+    if arr.dtype != bool:
+        arr = arr > 0
+    if target_shape is not None and arr.shape != target_shape:
+        arr_u8 = arr.astype(np.uint8) * 255
+        resized = cv2.resize(
+            arr_u8, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_NEAREST
+        )
+        arr = resized > 127
+    return arr
+def _score_card_mask(
+    mask: np.ndarray,
+    image_area: float,
+) -> Optional[Dict[str, Any]]:
+    """Score a candidate mask for being a credit card.
+    Returns a dict with {corners, width, height, area, aspect_ratio, rectangularity, score}
+    or None if the mask is rejected.
+    """
+    mask_u8 = mask.astype(np.uint8) * 255
+    mask_area = float(mask.sum())
+    area_ratio = mask_area / image_area
+    if area_ratio < MIN_CARD_AREA_RATIO or area_ratio > MAX_CARD_AREA_RATIO:
+        return None
+    contours, _ = cv2.findContours(mask_u8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+    # Largest external contour is the card body (SAM masks can be slightly disconnected)
+    contour = max(contours, key=cv2.contourArea)
+    contour_area = cv2.contourArea(contour)
+    if contour_area <= 0:
+        return None
+    rect = cv2.minAreaRect(contour)
+    box = cv2.boxPoints(rect)
+    rect_area = cv2.contourArea(box.astype(np.float32))
+    if rect_area <= 0:
+        return None
+    rectangularity = contour_area / rect_area
+    if rectangularity < MIN_RECTANGULARITY:
+        return None
+    corners = order_corners(box)
+    width, height = get_quad_dimensions(corners)
+    if width <= 0 or height <= 0:
+        return None
+    aspect_ratio = max(width, height) / min(width, height)
+    ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
+    if ratio_diff > ASPECT_RATIO_TOLERANCE:
+        return None
+    # Higher score: better rectangularity + tighter aspect ratio match + meaningful size
+    ratio_score = 1.0 - ratio_diff / ASPECT_RATIO_TOLERANCE
+    rect_score = (rectangularity - MIN_RECTANGULARITY) / (1.0 - MIN_RECTANGULARITY)
+    area_score = min(area_ratio / 0.1, 1.0)  # caps at 10% of image area
+    score = 0.4 * ratio_score + 0.4 * rect_score + 0.2 * area_score
+    return {
+        "corners": corners,
+        "contour": contour,
+        "width": width,
+        "height": height,
+        "area": mask_area,
+        "aspect_ratio": aspect_ratio,
+        "rectangularity": rectangularity,
+        "score": score,
+        "mask": mask,
+    }
+def _save_debug(
+    debug_dir: str,
+    image_bgr: np.ndarray,
+    all_masks: List[np.ndarray],
+    scored: List[Dict[str, Any]],
+    best: Optional[Dict[str, Any]],
+) -> None:
+    """Save debug visualizations for SAM card detection."""
+    Path(debug_dir).mkdir(parents=True, exist_ok=True)
+    # 01: all SAM masks overlaid (colored)
+    overlay = image_bgr.copy()
+    rng = np.random.default_rng(42)
+    for m in all_masks:
+        color = rng.integers(64, 255, size=3).tolist()
+        overlay[m] = (0.5 * overlay[m] + 0.5 * np.array(color)).astype(np.uint8)
+    cv2.imwrite(str(Path(debug_dir) / "01_all_sam_masks.png"), overlay)
+    # 02: scored card candidates (green contours, score labels)
+    cand_img = image_bgr.copy()
+    for s in scored:
+        corners = s["corners"].astype(np.int32)
+        cv2.polylines(cand_img, [corners], True, (0, 255, 0), 3)
+        cv2.putText(
+            cand_img,
+            f"{s['score']:.2f} ar={s['aspect_ratio']:.3f}",
+            tuple(corners[0]),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            (0, 255, 0),
+            3,
+            cv2.LINE_AA,
+        )
+    cv2.imwrite(str(Path(debug_dir) / "02_card_candidates.png"), cand_img)
+    # 03: final selection
+    if best is not None:
+        final = image_bgr.copy()
+        mask_u8 = best["mask"].astype(np.uint8) * 255
+        tint = np.zeros_like(final)
+        tint[:, :, 1] = mask_u8  # green channel
+        final = cv2.addWeighted(final, 1.0, tint, 0.35, 0)
+        corners = best["corners"].astype(np.int32)
+        cv2.polylines(final, [corners], True, (0, 255, 0), 4)
+        for pt in corners:
+            cv2.circle(final, tuple(pt), 10, (0, 0, 255), -1)
+        label = (
+            f"SAM card  score={best['score']:.3f}  "
+            f"ar={best['aspect_ratio']:.3f}  rect={best['rectangularity']:.3f}"
+        )
+        cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (255, 255, 255), 5, cv2.LINE_AA)
+        cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (0, 255, 0), 2, cv2.LINE_AA)
+        cv2.imwrite(str(Path(debug_dir) / "03_final_selection.png"), final)
+def detect_credit_card_sam(
+    image: np.ndarray,
+    debug_dir: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Detect a credit card in the image using SAM 2.1.
+    Args:
+        image: Input BGR image (any size)
+        debug_dir: Optional directory to save debug visualizations
+    Returns:
+        Dict with keys matching card_detection.detect_credit_card:
+        {corners, contour, confidence, width_px, height_px, aspect_ratio}
+        or None if no card-like mask was found.
+    """
+    from PIL import Image as PILImage
+    if debug_dir:
+        print(f"  SAM card detection debug → {debug_dir}")
+    h, w = image.shape[:2]
+    image_area = float(h * w)
+    scaled_bgr, scale_back = _downscale_for_inference(image)
+    scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
+    pil = PILImage.fromarray(scaled_rgb)
+    pipe = _get_pipeline()
+    t0 = time.time()
+    output = pipe(
+        pil,
+        points_per_side=POINTS_PER_SIDE,
+        points_per_batch=POINTS_PER_BATCH,
+    )
+    print(f"  SAM inference: {time.time() - t0:.1f}s → {len(output['masks'])} masks")
+    # Upscale masks back to original resolution once, keep them for scoring + debug
+    all_masks_full: List[np.ndarray] = []
+    for m in output["masks"]:
+        all_masks_full.append(_mask_to_bool_array(m, target_shape=(h, w)))
+    scored: List[Dict[str, Any]] = []
+    for m in all_masks_full:
+        result = _score_card_mask(m, image_area)
+        if result is not None:
+            scored.append(result)
+    scored.sort(key=lambda d: d["score"], reverse=True)
+    best = scored[0] if scored else None
+    if debug_dir:
+        _save_debug(debug_dir, image, all_masks_full, scored, best)
+    if best is None:
+        print("  SAM: no card-like mask found")
+        return None
+    print(
+        f"  SAM card: score={best['score']:.3f}, aspect={best['aspect_ratio']:.3f}, "
+        f"rect={best['rectangularity']:.3f}, {best['width']:.0f}x{best['height']:.0f}px"
+    )
+    return {
+        "corners": best["corners"],
+        "contour": best["corners"],
+        "confidence": float(best["score"]),
+        "width_px": float(best["width"]),
+        "height_px": float(best["height"]),
+        "aspect_ratio": float(best["aspect_ratio"]),
+        "mask": best["mask"],
+        "mask_source": "sam_amg",
+    }
+# ---------------------------------------------------------------------------
+# Prompt-based card detection (fast path)
+# ---------------------------------------------------------------------------
+def suggest_card_seeds(
+    hand_mask: np.ndarray,
+    image_shape: Tuple[int, int],
+    max_seeds: int = 25,
+) -> List[Tuple[int, int]]:
+    """Uniform 5x5 grid of seed points, with hand-mask points dropped.
+    Lay out a 5x5 lattice over the inner core of the image (outer 20% margin
+    trimmed on each axis because cards never sit flush against the image
+    edge), then discard any point that falls inside the hand mask. Dense
+    enough that at least one point reliably lands inside the credit card
+    regardless of where it sits relative to the hand.
+    Args:
+        hand_mask: bool or uint8 hand mask, same shape as the canonical image.
+        image_shape: (h, w) of the canonical image.
+        max_seeds: hard cap on returned seeds (default 25 = full 5x5 grid).
+    Returns:
+        List of (x, y) pixel coordinates in the canonical image frame.
+    """
+    h, w = image_shape
+    mask_bool = hand_mask.astype(bool) if hand_mask.dtype != bool else hand_mask
+    # 5x5 grid in [0.2, 0.8] × [0.2, 0.8] of the image.
+    fracs = (0.20, 0.35, 0.50, 0.65, 0.80)
+    xs_grid = [int(round(w * f)) for f in fracs]
+    ys_grid = [int(round(h * f)) for f in fracs]
+    seeds: List[Tuple[int, int]] = []
+    for gy in ys_grid:
+        for gx in xs_grid:
+            px = max(0, min(w - 1, gx))
+            py = max(0, min(h - 1, gy))
+            if mask_bool[py, px]:
+                continue
+            seeds.append((px, py))
+            if len(seeds) >= max_seeds:
+                return seeds
+    return seeds
+def _downscale_prompt(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
+    """Downscale for prompt inference. Returns (scaled, scale_back)."""
+    h, w = image_bgr.shape[:2]
+    long_side = max(h, w)
+    if long_side <= PROMPT_INFERENCE_MAX_SIDE:
+        return image_bgr, 1.0
+    scale = PROMPT_INFERENCE_MAX_SIDE / long_side
+    new_w = int(round(w * scale))
+    new_h = int(round(h * scale))
+    scaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    return scaled, 1.0 / scale
+def _save_prompt_debug(
+    debug_dir: str,
+    image_bgr: np.ndarray,
+    seeds: List[Tuple[int, int]],
+    negatives: List[Tuple[int, int]],
+    candidate_masks: List[np.ndarray],
+    scored: List[Dict[str, Any]],
+    best: Optional[Dict[str, Any]],
+) -> None:
+    """Save debug visualizations for prompt-based card detection."""
+    Path(debug_dir).mkdir(parents=True, exist_ok=True)
+    # 01: prompt points on the image
+    pts_img = image_bgr.copy()
+    for (px, py) in seeds:
+        cv2.circle(pts_img, (px, py), 20, (0, 255, 0), -1)
+        cv2.circle(pts_img, (px, py), 20, (0, 0, 0), 3)
+    for (nx, ny) in negatives:
+        cv2.circle(pts_img, (nx, ny), 20, (0, 0, 255), -1)
+        cv2.circle(pts_img, (nx, ny), 20, (0, 0, 0), 3)
+    cv2.imwrite(str(Path(debug_dir) / "01_prompt_points.png"), pts_img)
+    # 02: all candidate masks overlaid (one color per prompt)
+    overlay = image_bgr.copy()
+    rng = np.random.default_rng(7)
+    for m in candidate_masks:
+        if m is None or m.sum() == 0:
+            continue
+        color = rng.integers(64, 255, size=3).tolist()
+        overlay[m] = (0.5 * overlay[m] + 0.5 * np.array(color)).astype(np.uint8)
+    cv2.imwrite(str(Path(debug_dir) / "02_candidate_masks.png"), overlay)
+    # 03: scored candidates
+    cand_img = image_bgr.copy()
+    for s in scored:
+        corners = s["corners"].astype(np.int32)
+        cv2.polylines(cand_img, [corners], True, (0, 255, 0), 3)
+        cv2.putText(
+            cand_img,
+            f"{s['score']:.2f} ar={s['aspect_ratio']:.3f}",
+            tuple(corners[0]),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            (0, 255, 0),
+            3,
+            cv2.LINE_AA,
+        )
+    cv2.imwrite(str(Path(debug_dir) / "03_scored.png"), cand_img)
+    if best is not None:
+        final = image_bgr.copy()
+        mask_u8 = best["mask"].astype(np.uint8) * 255
+        tint = np.zeros_like(final)
+        tint[:, :, 1] = mask_u8
+        final = cv2.addWeighted(final, 1.0, tint, 0.35, 0)
+        corners = best["corners"].astype(np.int32)
+        cv2.polylines(final, [corners], True, (0, 255, 0), 4)
+        for pt in corners:
+            cv2.circle(final, tuple(pt), 10, (0, 0, 255), -1)
+        label = (
+            f"SAM-prompt card  score={best['score']:.3f}  "
+            f"ar={best['aspect_ratio']:.3f}  rect={best['rectangularity']:.3f}"
+        )
+        cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (255, 255, 255), 5, cv2.LINE_AA)
+        cv2.putText(final, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (0, 255, 0), 2, cv2.LINE_AA)
+        cv2.imwrite(str(Path(debug_dir) / "04_final_selection.png"), final)
+def detect_credit_card_sam_prompt(
+    image: np.ndarray,
+    seed_points: List[Tuple[int, int]],
+    negative_points: Optional[List[Tuple[int, int]]] = None,
+    debug_dir: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Prompt-based SAM 2.1 credit card detection.
+    For each seed point, runs a single-point SAM decoder pass with
+    `multimask_output=True` and collects all returned masks. Every mask is
+    then filtered through `_score_card_mask`; the highest-scoring survivor
+    is returned. This is ~20× faster than the AMG path because it runs the
+    decoder ~N times (one per seed) instead of 256 times on a dense grid.
+    Args:
+        image: Full-resolution BGR image (canonical orientation).
+        seed_points: List of (x, y) positive-point candidates. Each one is
+            tried independently. A few well-placed candidates are enough.
+        negative_points: Optional list of (x, y) negative points applied to
+            every seed's prompt (e.g., palm center to steer SAM off the hand).
+        debug_dir: Optional directory to dump debug visualizations.
+    Returns:
+        Card dict matching `detect_credit_card`/`detect_credit_card_sam`, or
+        None if no seed produced a valid card mask.
+    """
+    import torch
+    from PIL import Image as PILImage
+    if not seed_points:
+        print("  SAM-prompt: no seed points provided")
+        return None
+    h, w = image.shape[:2]
+    image_area = float(h * w)
+    scaled_bgr, scale_back = _downscale_prompt(image)
+    scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
+    pil = PILImage.fromarray(scaled_rgb)
+    scale_down = 1.0 / scale_back  # original → scaled
+    def _to_scaled(pts: List[Tuple[int, int]]) -> List[List[int]]:
+        return [[int(round(px * scale_down)), int(round(py * scale_down))] for px, py in pts]
+    seeds_scaled = _to_scaled(seed_points)
+    negatives_scaled = _to_scaled(negative_points) if negative_points else []
+    # Build one prompt per seed; each prompt carries (1 positive + all negatives)
+    # input_points shape: [batch=1, num_prompts, points_per_prompt, 2]
+    # input_labels shape: [batch=1, num_prompts, points_per_prompt]
+    points_per_prompt = 1 + len(negatives_scaled)
+    input_points = [[[seed] + negatives_scaled for seed in seeds_scaled]]
+    input_labels = [[[1] + [0] * len(negatives_scaled) for _ in seeds_scaled]]
+    model, processor = get_sam2()
+    t0 = time.time()
+    inputs = processor(
+        images=pil,
+        input_points=input_points,
+        input_labels=input_labels,
+        return_tensors="pt",
+    )
+    with torch.inference_mode():
+        # multimask_output=True gives 3 masks per seed (small / medium / large
+        # disambiguation of the prompt). Empirically this matters for card
+        # detection: SAM's single-best IoU mask sometimes latches onto a
+        # sub-region or a nearby distractor, but one of the other two
+        # candidates is the full card. Scoring cost is fine because we score
+        # in the scaled 1024-space, not full resolution.
+        outputs = model(**inputs, multimask_output=True)
+    # Score masks in the scaled 1024-space. Only the single winner is
+    # upscaled to full resolution afterward, which avoids O(N) 12 MP resizes.
+    scaled_h = inputs["original_sizes"][0][0].item()
+    scaled_w = inputs["original_sizes"][0][1].item()
+    scaled_area = float(scaled_h * scaled_w)
+    masks_list = processor.post_process_masks(
+        outputs.pred_masks.cpu(),
+        inputs["original_sizes"],
+        mask_threshold=0.0,
+    )
+    masks_tensor = masks_list[0]  # (num_prompts, num_candidates, H_s, W_s)
+    iou_scores = outputs.iou_scores.cpu().numpy()[0]
+    infer_time = time.time() - t0
+    scored: List[Dict[str, Any]] = []
+    scaled_candidate_masks: List[np.ndarray] = []
+    for prompt_idx in range(masks_tensor.shape[0]):
+        for cand_idx in range(masks_tensor.shape[1]):
+            mask_scaled = masks_tensor[prompt_idx, cand_idx].numpy().astype(bool)
+            scaled_candidate_masks.append(mask_scaled)
+            result = _score_card_mask(mask_scaled, scaled_area)
+            if result is not None:
+                result["seed_idx"] = prompt_idx
+                result["cand_idx"] = cand_idx
+                result["iou_score"] = float(iou_scores[prompt_idx, cand_idx])
+                result["mask_scaled"] = mask_scaled
+                scored.append(result)
+    scored.sort(key=lambda d: d["score"], reverse=True)
+    best = scored[0] if scored else None
+    # Upscale only the winning mask + corners to full resolution
+    if best is not None:
+        mask_scaled_best = best["mask_scaled"]
+        if mask_scaled_best.shape != (h, w):
+            mask_full = cv2.resize(
+                mask_scaled_best.astype(np.uint8), (w, h),
+                interpolation=cv2.INTER_NEAREST,
+            ).astype(bool)
+        else:
+            mask_full = mask_scaled_best
+        best["mask"] = mask_full
+        best["corners"] = best["corners"] * scale_back
+        best["width"] = best["width"] * scale_back
+        best["height"] = best["height"] * scale_back
+    print(
+        f"  SAM-prompt: {len(seed_points)} seeds, "
+        f"{masks_tensor.shape[0] * masks_tensor.shape[1]} candidates, "
+        f"{len(scored)} passed filter, inference={infer_time:.2f}s"
+    )
+    if debug_dir:
+        # Render debug overlays in the downscaled 1024-space. Upscaling
+        # ~60 masks to full 12 MP resolution just for PNGs was dominating
+        # end-to-end time (8–10s out of ~9s total). The debug images are
+        # for human inspection; 1024 is plenty.
+        dh, dw = scaled_bgr.shape[:2]
+        debug_seeds = [
+            (int(round(px / scale_back)), int(round(py / scale_back)))
+            for px, py in seed_points
+        ]
+        debug_negs = [
+            (int(round(px / scale_back)), int(round(py / scale_back)))
+            for px, py in (negative_points or [])
+        ]
+        debug_scored_for_viz = []
+        for s in scored:
+            s_copy = dict(s)
+            s_copy["corners"] = s["corners"]  # already scaled-space
+            s_copy["mask"] = s["mask_scaled"]
+            debug_scored_for_viz.append(s_copy)
+        best_for_viz = None
+        if best is not None:
+            best_for_viz = dict(best)
+            best_for_viz["corners"] = best["corners"] / scale_back  # back to scaled
+            best_for_viz["mask"] = best["mask_scaled"]
+        _save_prompt_debug(
+            debug_dir, scaled_bgr, debug_seeds, debug_negs,
+            scaled_candidate_masks, debug_scored_for_viz, best_for_viz,
+        )
+    if best is None:
+        return None
+    print(
+        f"  SAM-prompt card: score={best['score']:.3f}, "
+        f"aspect={best['aspect_ratio']:.3f}, rect={best['rectangularity']:.3f}, "
+        f"{best['width']:.0f}x{best['height']:.0f}px (seed {best['seed_idx']})"
+    )
+    return {
+        "corners": best["corners"],
+        "contour": best["corners"],
+        "confidence": float(best["score"]),
+        "width_px": float(best["width"]),
+        "height_px": float(best["height"]),
+        "aspect_ratio": float(best["aspect_ratio"]),
+        "mask": best["mask"],  # bool HxW, canonical-image coords
+        "mask_source": "sam_prompt",
+    }

src/sam_hand_segmentation.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""
+SAM 2.1-based hand segmentation.
+Produces a pixel-accurate hand mask using Meta's Segment Anything 2.1
+(Hiera Tiny) via HuggingFace transformers, seeded by a positive point
+prompt at the palm center (derived from MediaPipe landmarks). Optional
+negative points can steer SAM away from the credit card.
+This replaces the synthetic convex-hull "mask" produced by
+`finger_segmentation._create_hand_mask()`, which is built from the
+21 hand landmarks and does not follow the true hand contour.
+Prompt-based inference: ~0.6s per call on CPU (vs ~18s for AMG).
+"""
+from __future__ import annotations
+import time
+from pathlib import Path
+from typing import List, Optional, Tuple
+import cv2
+import numpy as np
+from .sam_backend import INFERENCE_MAX_SIDE, get_sam2
+def _downscale(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
+    """Downscale so the long side is INFERENCE_MAX_SIDE. Returns (scaled, scale_back).
+    `scale_back` is the factor to multiply scaled coords by to get original coords.
+    """
+    h, w = image_bgr.shape[:2]
+    long_side = max(h, w)
+    if long_side <= INFERENCE_MAX_SIDE:
+        return image_bgr, 1.0
+    scale = INFERENCE_MAX_SIDE / long_side
+    new_w = int(round(w * scale))
+    new_h = int(round(h * scale))
+    return cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA), 1.0 / scale
+def segment_hand_sam(
+    image_bgr: np.ndarray,
+    palm_xy: Tuple[int, int],
+    negative_points: Optional[List[Tuple[int, int]]] = None,
+    debug_dir: Optional[str] = None,
+) -> Optional[np.ndarray]:
+    """Return a pixel-accurate bool hand mask (H x W) via SAM 2.1 Tiny.
+    Args:
+        image_bgr: Full-resolution BGR image in the canonical orientation.
+        palm_xy: (x, y) pixel coordinates of the palm center (positive prompt).
+        negative_points: Optional list of (x, y) points to steer SAM away from
+            non-hand regions (e.g., credit card center).
+        debug_dir: Optional directory to save mask + overlay for inspection.
+    Returns:
+        Bool mask of the same shape as `image_bgr[:2]`, or None on failure.
+    """
+    import torch
+    from PIL import Image as PILImage
+    h_full, w_full = image_bgr.shape[:2]
+    scaled_bgr, scale_back = _downscale(image_bgr)
+    scaled_rgb = cv2.cvtColor(scaled_bgr, cv2.COLOR_BGR2RGB)
+    pil = PILImage.fromarray(scaled_rgb)
+    # Map prompt points into the scaled image space
+    scale_down = 1.0 / scale_back  # original -> scaled
+    palm_scaled = (int(round(palm_xy[0] * scale_down)), int(round(palm_xy[1] * scale_down)))
+    prompt_points = [list(palm_scaled)]
+    prompt_labels = [1]
+    if negative_points:
+        for nx, ny in negative_points:
+            prompt_points.append([int(round(nx * scale_down)), int(round(ny * scale_down))])
+            prompt_labels.append(0)
+    model, processor = get_sam2()
+    t0 = time.time()
+    inputs = processor(
+        images=pil,
+        input_points=[[prompt_points]],
+        input_labels=[[prompt_labels]],
+        return_tensors="pt",
+    )
+    with torch.inference_mode():
+        outputs = model(**inputs, multimask_output=True)
+    masks = processor.post_process_masks(
+        outputs.pred_masks.cpu(),
+        inputs["original_sizes"],
+        mask_threshold=0.0,
+    )[0][0]  # (num_candidates, H_scaled, W_scaled)
+    scores = outputs.iou_scores.cpu().numpy()[0, 0]
+    best_idx = int(np.argmax(scores))
+    mask_scaled = masks[best_idx].numpy().astype(bool)
+    best_score = float(scores[best_idx])
+    infer_time = time.time() - t0
+    # Upscale back to original resolution
+    if mask_scaled.shape != (h_full, w_full):
+        mask_full = cv2.resize(
+            mask_scaled.astype(np.uint8),
+            (w_full, h_full),
+            interpolation=cv2.INTER_NEAREST,
+        ).astype(bool)
+    else:
+        mask_full = mask_scaled
+    print(
+        f"  SAM hand mask: score={best_score:.3f} time={infer_time:.1f}s "
+        f"area={int(mask_full.sum())}px"
+    )
+    if debug_dir:
+        Path(debug_dir).mkdir(parents=True, exist_ok=True)
+        # Raw mask
+        cv2.imwrite(str(Path(debug_dir) / "sam_hand_mask.png"),
+                    mask_full.astype(np.uint8) * 255)
+        # Overlay with prompt points
+        overlay = image_bgr.copy()
+        tint = np.zeros_like(overlay)
+        tint[mask_full] = (0, 255, 255)
+        overlay = cv2.addWeighted(overlay, 1.0, tint, 0.35, 0)
+        contours, _ = cv2.findContours(
+            mask_full.astype(np.uint8) * 255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+        )
+        cv2.drawContours(overlay, contours, -1, (0, 255, 255), 3, cv2.LINE_AA)
+        cv2.circle(overlay, palm_xy, 20, (0, 255, 0), -1)
+        cv2.circle(overlay, palm_xy, 20, (0, 0, 0), 3)
+        if negative_points:
+            for nx, ny in negative_points:
+                cv2.circle(overlay, (int(nx), int(ny)), 20, (0, 0, 255), -1)
+                cv2.circle(overlay, (int(nx), int(ny)), 20, (0, 0, 0), 3)
+        label = f"SAM hand  score={best_score:.2f}  {infer_time:.1f}s"
+        cv2.putText(overlay, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (255, 255, 255), 5, cv2.LINE_AA)
+        cv2.putText(overlay, label, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
+                    (0, 255, 255), 2, cv2.LINE_AA)
+        cv2.imwrite(str(Path(debug_dir) / "sam_hand_overlay.png"), overlay)
+    return mask_full
+def palm_center_from_landmarks(landmarks_px: np.ndarray) -> Tuple[int, int]:
+    """Return (x, y) pixel coord of the palm center from the 21 MediaPipe landmarks.
+    Defined as the mean of wrist (0) + four MCPs (5, 9, 13, 17).
+    """
+    idx = [0, 5, 9, 13, 17]
+    center = np.mean(landmarks_px[idx, :2], axis=0)
+    return (int(round(center[0])), int(round(center[1])))

web_demo/README.md CHANGED Viewed

@@ -25,4 +25,4 @@ Open `http://localhost:8000`.
 - Debug overlay auto-generated per request
 - Default guided sample image is at `web_demo/static/examples/default_sample.jpg`
 - `Start Measurement` uses the default sample image when no upload is selected
-- Web demo enforces Sobel edge refinement only (`edge_method=sobel`)

 - Debug overlay auto-generated per request
 - Default guided sample image is at `web_demo/static/examples/default_sample.jpg`
 - `Start Measurement` uses the default sample image when no upload is selected
+- Web demo enforces SAM-mask boundary edge detection only (`edge_method=mask`)

web_demo/app.py CHANGED Viewed

@@ -36,7 +36,9 @@ RESULTS_DIR = APP_ROOT / "results"
 DEFAULT_SAMPLE_PATH = APP_ROOT / "static" / "examples" / "default_sample.jpg"
 DEFAULT_SAMPLE_URL = "/static/examples/default_sample.jpg"
 ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}
-DEMO_EDGE_METHOD = "sobel"
 app = Flask(__name__)
@@ -233,6 +235,8 @@ def _run_measurement(
         image=image,
         finger_index=finger_index,
         edge_method=DEMO_EDGE_METHOD,
         result_png_path=str(result_png_path),
         save_debug=False,
         ring_model=ring_model,
@@ -313,6 +317,8 @@ def _run_multi_measurement(
     result = measure_multi_finger(
         image=image,
         edge_method=DEMO_EDGE_METHOD,
         result_png_path=str(result_png_path),
         save_debug=False,
         no_calibration=False,

 DEFAULT_SAMPLE_PATH = APP_ROOT / "static" / "examples" / "default_sample.jpg"
 DEFAULT_SAMPLE_URL = "/static/examples/default_sample.jpg"
 ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}
+DEMO_EDGE_METHOD = "mask"
+DEMO_CARD_METHOD = "sam"
+DEMO_HAND_MASK_METHOD = "sam"
 app = Flask(__name__)
         image=image,
         finger_index=finger_index,
         edge_method=DEMO_EDGE_METHOD,
+        card_method=DEMO_CARD_METHOD,
+        hand_mask_method=DEMO_HAND_MASK_METHOD,
         result_png_path=str(result_png_path),
         save_debug=False,
         ring_model=ring_model,
     result = measure_multi_finger(
         image=image,
         edge_method=DEMO_EDGE_METHOD,
+        card_method=DEMO_CARD_METHOD,
+        hand_mask_method=DEMO_HAND_MASK_METHOD,
         result_png_path=str(result_png_path),
         save_debug=False,
         no_calibration=False,

web_demo/static/app.js CHANGED Viewed

@@ -112,7 +112,7 @@ const buildMeasureSettings = () => {
   const aiOn = aiToggle ? (aiToggle.type === "checkbox" ? aiToggle.checked : true) : false;
   return {
     finger_index: fingerSelect ? fingerSelect.value : "index",
-    edge_method: "sobel",
     mode: mode,
     ring_model: ringModel,
     ai_explain: aiOn ? "1" : "0",

   const aiOn = aiToggle ? (aiToggle.type === "checkbox" ? aiToggle.checked : true) : false;
   return {
     finger_index: fingerSelect ? fingerSelect.value : "index",
+    edge_method: "mask",
     mode: mode,
     ring_model: ringModel,
     ai_explain: aiOn ? "1" : "0",

web_demo/supabase_client.py CHANGED Viewed

@@ -19,12 +19,24 @@ _initialized = False
 def _get_client():
-    """Lazy-init Supabase client. Returns None if env vars missing."""
     global _client, _initialized
     if _initialized:
         return _client
     _initialized = True
     url = os.environ.get("SUPABASE_URL", "").strip()
     key = os.environ.get("SUPABASE_SERVICE_KEY", "").strip()
     if not url or not key:

 def _get_client():
+    """Lazy-init Supabase client. Returns None if persistence is disabled.
+    Persistence is disabled when either:
+    - SUPABASE_URL / SUPABASE_SERVICE_KEY is missing, or
+    - RING_DISABLE_SUPABASE is set to a truthy value (explicit opt-out, so
+      local dev sessions don't upload photos + result PNGs to the real
+      bucket on every request).
+    """
     global _client, _initialized
     if _initialized:
         return _client
     _initialized = True
+    disable = os.environ.get("RING_DISABLE_SUPABASE", "").strip().lower()
+    if disable in ("1", "true", "yes", "on"):
+        logger.info("RING_DISABLE_SUPABASE set — persistence disabled")
+        return None
     url = os.environ.get("SUPABASE_URL", "").strip()
     key = os.environ.get("SUPABASE_SERVICE_KEY", "").strip()
     if not url or not key: