Spaces:

feng-x
/

ring-sizer

Running

App Files Files Community

feng-x commited on Apr 15

Commit

b96e083

verified ·

1 Parent(s): 4f1901d

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

AGENTS.md +2 -1
CLAUDE.md +2 -1
README.md +4 -2
app.py +423 -0
measure_finger.py +125 -65
requirements.txt +3 -0
src/sam_backend.py +42 -6
src/sam_card_detection.py +11 -4
src/sam_hand_segmentation.py +4 -1

AGENTS.md CHANGED Viewed

@@ -21,7 +21,8 @@ For tasks of **reboot** from a new codex session:
 1. Read doc/v0/PRD.md, doc/v0/Plan.md, doc/v0/Progress.md for baseline implementation
 2. Read doc/v1/PRD.md, doc/v1/Plan.md, doc/v1/Progress.md for edge refinement (v1)
 3. Read doc/v4/PRD.md, doc/v4/Plan.md, doc/v4/Progress.md for SAM 2.1 integration (card + hand)
-4. Assume this is a continuation of an existing project.
 5. Summarize your understanding of the current state and propose the next concrete step without writing code yet.
 ## Project Overview

 1. Read doc/v0/PRD.md, doc/v0/Plan.md, doc/v0/Progress.md for baseline implementation
 2. Read doc/v1/PRD.md, doc/v1/Plan.md, doc/v1/Progress.md for edge refinement (v1)
 3. Read doc/v4/PRD.md, doc/v4/Plan.md, doc/v4/Progress.md for SAM 2.1 integration (card + hand)
+4. Read doc/v5/PRD.md, doc/v5/Plan.md, doc/v5/Progress.md for the Gradio/ZeroGPU deployment port
+5. Assume this is a continuation of an existing project.
 5. Summarize your understanding of the current state and propose the next concrete step without writing code yet.
 ## Project Overview

CLAUDE.md CHANGED Viewed

@@ -21,7 +21,8 @@ For tasks of **reboot** from a new codex session:
 1. Read doc/v0/PRD.md, doc/v0/Plan.md, doc/v0/Progress.md for baseline implementation
 2. Read doc/v1/PRD.md, doc/v1/Plan.md, doc/v1/Progress.md for edge refinement (v1)
 3. Read doc/v4/PRD.md, doc/v4/Plan.md, doc/v4/Progress.md for SAM 2.1 integration (card + hand)
-4. Assume this is a continuation of an existing project.
 5. Summarize your understanding of the current state and propose the next concrete step without writing code yet.
 ## Project Overview

 1. Read doc/v0/PRD.md, doc/v0/Plan.md, doc/v0/Progress.md for baseline implementation
 2. Read doc/v1/PRD.md, doc/v1/Plan.md, doc/v1/Progress.md for edge refinement (v1)
 3. Read doc/v4/PRD.md, doc/v4/Plan.md, doc/v4/Progress.md for SAM 2.1 integration (card + hand)
+4. Read doc/v5/PRD.md, doc/v5/Plan.md, doc/v5/Progress.md for the Gradio/ZeroGPU deployment port
+5. Assume this is a continuation of an existing project.
 5. Summarize your understanding of the current state and propose the next concrete step without writing code yet.
 ## Project Overview

README.md CHANGED Viewed

@@ -3,8 +3,10 @@ title: Ring Sizer
 emoji: "\U0001F48D"
 colorFrom: blue
 colorTo: purple
-sdk: docker
-app_port: 7860
 ---
 # Ring Sizer

 emoji: "\U0001F48D"
 colorFrom: blue
 colorTo: purple
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+python_version: "3.10"
 ---
 # Ring Sizer

app.py ADDED Viewed

	@@ -0,0 +1,423 @@

+#!/usr/bin/env python3
+"""Gradio entry point for the Ring Sizer HuggingFace Space (v5).
+Public demo flow only: upload → measurement → result image + ring size
+summary + raw JSON. The Flask app in `web_demo/` is still used locally for
+admin / CSV / ground-truth editing, but HF Spaces now serves this Gradio
+app so the measurement call can run on ZeroGPU-backed H200 GPUs.
+See `doc/v5/` for the PRD, plan, and progress notes.
+"""
+from __future__ import annotations
+import logging
+import os
+import sys
+import tempfile
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+# `spaces` is a no-op outside HF ZeroGPU, so importing it unconditionally
+# keeps the local CPU path working without conditional imports.
+import spaces  # type: ignore
+import gradio as gr
+ROOT_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT_DIR))
+from measure_finger import (  # noqa: E402
+    measure_finger,
+    measure_multi_finger,
+    apply_calibration,
+)
+from src.ring_size import (  # noqa: E402
+    recommend_ring_size,
+    VALID_RING_MODELS,
+    DEFAULT_RING_MODEL,
+)
+from src.ai_recommendation import ai_explain_recommendation  # noqa: E402
+from src.sam_backend import get_sam2  # noqa: E402
+# HF ZeroGPU docs: "models must be placed on cuda at the root module level"
+# (a PyTorch CUDA emulation mode is enabled outside @spaces.GPU functions,
+# so this runs cleanly both on ZeroGPU and CPU). Pre-loading here means the
+# first request does not pay the weight-to-GPU transfer cost.
+try:
+    get_sam2()
+except Exception as exc:  # noqa: BLE001
+    # Don't block app startup if SAM weights are missing — the measurement
+    # call will re-attempt and surface a clearer error to the user.
+    print(f"[v5] SAM preload skipped: {exc}")
+# Supabase persistence piggybacks on the same async executor pattern as the
+# Flask app so the GPU slice releases as soon as the measurement returns.
+try:
+    from web_demo.supabase_client import upload_file, save_measurement  # noqa: E402
+    _SUPABASE_AVAILABLE = True
+except Exception as exc:  # noqa: BLE001
+    print(f"[v5] Supabase client not importable ({exc}) — persistence disabled")
+    _SUPABASE_AVAILABLE = False
+logger = logging.getLogger(__name__)
+_persist_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="supa-persist")
+RESULTS_DIR = ROOT_DIR / "web_demo" / "results"
+UPLOADS_DIR = ROOT_DIR / "web_demo" / "uploads"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
+DEMO_EDGE_METHOD = "mask"
+DEMO_CARD_METHOD = "sam"
+DEMO_HAND_MASK_METHOD = "sam"
+DEFAULT_SAMPLE_PATH = ROOT_DIR / "web_demo" / "static" / "examples" / "default_sample.jpg"
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _numpy_safe(obj: Any) -> Any:
+    """Recursively convert numpy scalar/array types to native Python types.
+    Gradio's JSON component calls `json.dumps` internally, which trips on
+    `np.float32`, `np.bool_`, and friends. This mirrors the helper already
+    used by the Flask app.
+    """
+    if isinstance(obj, dict):
+        return {k: _numpy_safe(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_numpy_safe(v) for v in obj]
+    if isinstance(obj, np.bool_):
+        return bool(obj)
+    if isinstance(obj, np.integer):
+        return int(obj)
+    if isinstance(obj, np.floating):
+        return float(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, np.generic):
+        return obj.item()
+    return obj
+def _make_base_name(kol_name: str) -> Tuple[str, str]:
+    run_id = uuid.uuid4().hex[:8]
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    slug = "".join(c if c.isalnum() else "-" for c in (kol_name or "anon")).strip("-").lower() or "anon"
+    return f"{slug}_{timestamp}_{run_id}", run_id
+def _persist_async(
+    *,
+    upload_path: Optional[Path],
+    upload_name: str,
+    result_png_path: Path,
+    result_png_name: str,
+    record: Dict[str, Any],
+) -> None:
+    """Fire-and-forget Supabase persistence (storage uploads + row insert).
+    Errors are logged, never raised — a broken Supabase connection must
+    never poison the measurement the user just saw.
+    """
+    if not _SUPABASE_AVAILABLE:
+        return
+    def _task() -> None:
+        try:
+            photo_url = None
+            result_url = None
+            if upload_path is not None and upload_path.exists():
+                photo_url = upload_file(str(upload_path), f"photos/{upload_name}")
+            if result_png_path.exists():
+                result_url = upload_file(str(result_png_path), f"results/{result_png_name}")
+            record_with_urls = dict(record)
+            record_with_urls["photo_url"] = photo_url
+            record_with_urls["result_url"] = result_url
+            save_measurement(record_with_urls)
+        except Exception as exc:  # noqa: BLE001
+            logger.exception("Supabase persist failed for run %s: %s",
+                             record.get("run_id"), exc)
+    _persist_executor.submit(_task)
+def _format_summary(result: Dict[str, Any], mode: str) -> str:
+    """Render a human-readable markdown summary above the raw JSON."""
+    if result.get("fail_reason"):
+        return f"**Measurement failed:** `{result['fail_reason']}`"
+    if mode == "multi":
+        lines = ["### Multi-finger result"]
+        for fn in ("index", "middle", "ring"):
+            pf = (result.get("per_finger") or {}).get(fn, {})
+            if pf.get("status") == "ok":
+                diam = pf.get("diameter_cm")
+                best = pf.get("best_match")
+                rng = pf.get("range", (None, None))
+                lines.append(
+                    f"- **{fn.capitalize()}:** {diam:.2f} cm → "
+                    f"size **{best}** (range {rng[0]}–{rng[1]})"
+                )
+            else:
+                lines.append(f"- **{fn.capitalize()}:** failed ({pf.get('fail_reason', 'unknown')})")
+        if result.get("overall_best_size") is not None:
+            lines.append("")
+            lines.append(
+                f"**Recommended size:** **{result['overall_best_size']}** "
+                f"(range {result.get('overall_range_min')}–{result.get('overall_range_max')})"
+            )
+        if result.get("ai_explanation"):
+            lines.append("")
+            lines.append(f"**Why:** {result['ai_explanation']}")
+        return "\n".join(lines)
+    # Single finger
+    diam = result.get("finger_outer_diameter_cm")
+    conf = result.get("confidence")
+    ring = result.get("ring_size") or {}
+    lines = ["### Single-finger result"]
+    if diam is not None:
+        lines.append(f"- **Diameter:** {diam:.2f} cm")
+    if result.get("raw_diameter_cm") is not None:
+        lines.append(f"- **Raw (uncalibrated):** {result['raw_diameter_cm']:.2f} cm")
+    if conf is not None:
+        lines.append(f"- **Confidence:** {conf:.2f}")
+    if ring:
+        lines.append(
+            f"- **Ring size:** **{ring.get('best_match')}** "
+            f"(range {ring.get('range_min')}–{ring.get('range_max')})"
+        )
+    return "\n".join(lines)
+# ---------------------------------------------------------------------------
+# Measurement handler
+# ---------------------------------------------------------------------------
+@spaces.GPU(duration=60)
+def run_measurement(
+    image: Optional[np.ndarray],
+    finger_index: str,
+    mode: str,
+    ring_model: str,
+    kol_name: str,
+    ai_explain: bool,
+) -> Tuple[Optional[np.ndarray], Dict[str, Any], str]:
+    """Run the measurement pipeline and return (overlay, json, summary).
+    Wrapped in `@spaces.GPU` so HF ZeroGPU allocates an H200 slice per
+    request. Outside ZeroGPU the decorator is a no-op and this runs on CPU.
+    """
+    if image is None:
+        return None, {"error": "No image uploaded"}, "**Error:** please upload an image."
+    if ring_model not in VALID_RING_MODELS:
+        ring_model = DEFAULT_RING_MODEL
+    # Gradio gives us an RGB numpy array; the rest of the pipeline expects BGR.
+    if image.ndim == 3 and image.shape[2] == 3:
+        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    else:
+        image_bgr = image
+    base_name, run_id = _make_base_name(kol_name)
+    result_png_name = f"{base_name}_result.png"
+    result_png_path = RESULTS_DIR / result_png_name
+    # Also save the raw upload so Supabase persistence has something to push.
+    upload_name = f"{base_name}.jpg"
+    upload_path = UPLOADS_DIR / upload_name
+    cv2.imwrite(str(upload_path), image_bgr)
+    if mode == "multi":
+        result = measure_multi_finger(
+            image=image_bgr,
+            edge_method=DEMO_EDGE_METHOD,
+            card_method=DEMO_CARD_METHOD,
+            hand_mask_method=DEMO_HAND_MASK_METHOD,
+            result_png_path=str(result_png_path),
+            save_debug=False,
+            no_calibration=False,
+            ring_model=ring_model,
+        )
+        result = _numpy_safe(result)
+        per_finger = result.get("per_finger", {})
+        finger_widths = {
+            fn: (pf.get("diameter_cm") if pf.get("status") == "ok" else None)
+            for fn, pf in per_finger.items()
+        }
+        if ai_explain and result.get("overall_best_size") is not None:
+            ai_reason = ai_explain_recommendation(
+                finger_widths,
+                recommended_size=result["overall_best_size"],
+                range_min=result["overall_range_min"],
+                range_max=result["overall_range_max"],
+                ring_model=ring_model,
+            )
+            if ai_reason:
+                result["ai_explanation"] = ai_reason
+        # Persist async (release GPU slice first — this runs on CPU after return)
+        confidences = [
+            pf.get("confidence") for pf in per_finger.values()
+            if pf.get("status") == "ok" and pf.get("confidence") is not None
+        ]
+        overall_confidence = min(confidences) if confidences else None
+        _persist_async(
+            upload_path=upload_path,
+            upload_name=upload_name,
+            result_png_path=result_png_path,
+            result_png_name=result_png_name,
+            record={
+                "run_id": run_id,
+                "kol_name": kol_name,
+                "mode": "multi",
+                "ring_model": ring_model,
+                "overall_best_size": result.get("overall_best_size"),
+                "overall_range_min": result.get("overall_range_min"),
+                "overall_range_max": result.get("overall_range_max"),
+                "per_finger": per_finger,
+                "confidence": overall_confidence,
+                "result_json": result,
+                "fail_reason": result.get("fail_reason"),
+            },
+        )
+    else:
+        result = measure_finger(
+            image=image_bgr,
+            finger_index=finger_index,
+            edge_method=DEMO_EDGE_METHOD,
+            card_method=DEMO_CARD_METHOD,
+            hand_mask_method=DEMO_HAND_MASK_METHOD,
+            result_png_path=str(result_png_path),
+            save_debug=False,
+            ring_model=ring_model,
+        )
+        raw_diameter = result.get("finger_outer_diameter_cm")
+        if raw_diameter is not None:
+            result["raw_diameter_cm"] = round(raw_diameter, 4)
+            calibrated = round(apply_calibration(raw_diameter), 4)
+            result["finger_outer_diameter_cm"] = calibrated
+            result["calibration_applied"] = True
+            rec = recommend_ring_size(calibrated, ring_model=ring_model)
+            if rec:
+                result["ring_size"] = rec
+        result = _numpy_safe(result)
+        ring_size = result.get("ring_size", {}) or {}
+        _persist_async(
+            upload_path=upload_path,
+            upload_name=upload_name,
+            result_png_path=result_png_path,
+            result_png_name=result_png_name,
+            record={
+                "run_id": run_id,
+                "kol_name": kol_name,
+                "mode": "single",
+                "ring_model": ring_model,
+                "finger_index": finger_index,
+                "diameter_cm": result.get("finger_outer_diameter_cm"),
+                "confidence": result.get("confidence"),
+                "overall_best_size": ring_size.get("best_match"),
+                "overall_range_min": ring_size.get("range_min"),
+                "overall_range_max": ring_size.get("range_max"),
+                "result_json": result,
+                "fail_reason": result.get("fail_reason"),
+            },
+        )
+    # Load the overlay image Gradio will display.
+    overlay_rgb: Optional[np.ndarray] = None
+    if result_png_path.exists():
+        overlay_bgr = cv2.imread(str(result_png_path))
+        if overlay_bgr is not None:
+            overlay_rgb = cv2.cvtColor(overlay_bgr, cv2.COLOR_BGR2RGB)
+    summary = _format_summary(result, mode)
+    return overlay_rgb, result, summary
+# ---------------------------------------------------------------------------
+# UI
+# ---------------------------------------------------------------------------
+_DESCRIPTION = """
+Upload a single photo with **one hand and a credit card on the same flat
+surface**. The app detects the card (for scale), segments the hand, and
+measures the outer diameter of the chosen finger at the ring-wearing zone.
+"""
+_EXAMPLES: List[List[Any]] = []
+if DEFAULT_SAMPLE_PATH.exists():
+    _EXAMPLES.append([str(DEFAULT_SAMPLE_PATH), "index", "single", DEFAULT_RING_MODEL, "", False])
+def build_demo() -> gr.Blocks:
+    with gr.Blocks(title="Ring Sizer") as demo:
+        gr.Markdown("# 💍 Ring Sizer")
+        gr.Markdown(_DESCRIPTION)
+        with gr.Row():
+            with gr.Column(scale=1):
+                image_in = gr.Image(
+                    type="numpy",
+                    label="Hand + credit card photo",
+                    sources=["upload", "webcam"],
+                )
+                finger_in = gr.Dropdown(
+                    choices=["index", "middle", "ring"],
+                    value="index",
+                    label="Finger",
+                )
+                mode_in = gr.Radio(
+                    choices=["single", "multi"],
+                    value="single",
+                    label="Mode",
+                    info="`single` measures one finger; `multi` measures index + middle + ring and aggregates.",
+                )
+                ring_model_in = gr.Dropdown(
+                    choices=list(VALID_RING_MODELS),
+                    value=DEFAULT_RING_MODEL,
+                    label="Ring model",
+                )
+                kol_name_in = gr.Textbox(label="Name (optional)", placeholder="")
+                ai_explain_in = gr.Checkbox(label="Explain recommendation (AI)", value=False)
+                run_btn = gr.Button("Measure", variant="primary")
+            with gr.Column(scale=1):
+                image_out = gr.Image(label="Measurement overlay")
+                summary_out = gr.Markdown(label="Summary")
+                json_out = gr.JSON(label="Raw result")
+        run_btn.click(
+            fn=run_measurement,
+            inputs=[image_in, finger_in, mode_in, ring_model_in, kol_name_in, ai_explain_in],
+            outputs=[image_out, json_out, summary_out],
+        )
+        if _EXAMPLES:
+            gr.Examples(
+                examples=_EXAMPLES,
+                inputs=[image_in, finger_in, mode_in, ring_model_in, kol_name_in, ai_explain_in],
+                label="Try the default sample",
+            )
+    return demo
+demo = build_demo()
+if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", "7860")))

measure_finger.py CHANGED Viewed

@@ -12,9 +12,39 @@ Usage:
 import argparse
 import json
 import sys
 from pathlib import Path
 from typing import Optional, Dict, Any, List, Literal, Tuple
 import cv2
 import numpy as np
@@ -580,8 +610,12 @@ def measure_finger(
     Returns:
         Output dictionary with measurement results
     """
     # Phase 2: Image quality metrics (informational only — no hard fail)
-    quality = assess_image_quality(image)
     print(f"Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, "
           f"contrast={quality['contrast']:.1f}")
@@ -596,12 +630,13 @@ def measure_finger(
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
-    hand_data = segment_hand(
-        image,
-        finger=finger_index,
-        debug_dir=finger_debug_dir,
-        use_sam_mask=(hand_mask_method == "sam"),
-    )
     if hand_data is None:
         print("No hand detected in image")
@@ -639,12 +674,13 @@ def measure_finger(
         view_angle_ok = True
         card_detected = False
     else:
-        if card_method == "sam":
-            card_result = _sam_card_detect(
-                image_canonical, hand_data, save_debug, result_png_path
-            )
-        else:
-            card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             print("Credit card not detected in image")
@@ -682,7 +718,8 @@ def measure_finger(
     # length and can cut into a wider-than-average finger, which would make
     # the mask boundary narrower than the true SAM boundary.
     raw_hand_mask = hand_data.get("mask")
-    finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
     if finger_data is None:
         print(f"Could not isolate finger: {finger_index}")
@@ -889,20 +926,21 @@ def measure_finger(
             else:
                 edge_mask_input = cleaned_mask
-            sobel_measurement = refine_edges_sobel(
-                image=image_canonical,  # Use canonical orientation
-                axis_data=axis_data,
-                zone_data=zone_data,
-                scale_px_per_cm=px_per_cm,
-                finger_landmarks=finger_data.get("landmarks"),
-                sobel_threshold=sobel_threshold,
-                kernel_size=sobel_kernel_size,
-                use_subpixel=use_subpixel,
-                finger_mask=edge_mask_input,
-                debug_dir=edge_debug_dir,
-                mask_mode=mask_mode,
-                finger_name=finger_data.get("finger_name"),
-            )
             sobel_width_cm = sobel_measurement["median_width_cm"]
             print(f"Edge width: {sobel_width_cm:.4f}cm "
@@ -1057,6 +1095,7 @@ def measure_finger(
         print(f"Warning: Confidence {confidence_breakdown['overall']:.3f} is below threshold {confidence_threshold:.3f}")
     # Phase 9: Result visualization (always generated)
     if result_png_path is not None:
         print(f"Generating result visualization...")
@@ -1141,6 +1180,14 @@ def measure_finger(
         _save_debug_visualization(result_png_path, debug_image)
         print(f"Result visualization saved to: {result_png_path}")
     return create_output(
         finger_diameter_cm=median_width_cm,
@@ -1410,8 +1457,12 @@ def measure_multi_finger(
     """
     from src.finger_segmentation import FINGER_LANDMARKS
     # Phase 1: Image quality metrics (informational only — no hard fail)
-    quality = assess_image_quality(image)
     print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
@@ -1428,12 +1479,13 @@ def measure_multi_finger(
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
-    hand_data = segment_hand(
-        image,
-        finger="index",
-        debug_dir=finger_debug_dir,
-        use_sam_mask=(hand_mask_method == "sam"),
-    )
     if hand_data is None:
         print("[multi] No hand detected")
         return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
@@ -1453,12 +1505,13 @@ def measure_multi_finger(
         view_angle_ok = True
         card_detected = False
     else:
-        if card_method == "sam":
-            card_result = _sam_card_detect(
-                image_canonical, hand_data, save_debug, result_png_path
-            )
-        else:
-            card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             # Emit a diagnostic visualization so the failure is debuggable:
             # hand mask + card-prompt seeds on the canonical image. Without
@@ -1502,20 +1555,21 @@ def measure_multi_finger(
     per_finger_raw: Dict[str, Dict] = {}
     for fn in MULTI_FINGERS:
         print(f"\n[multi] === Measuring {fn} finger ===")
-        result = _measure_single_finger_from_shared(
-            image_canonical=image_canonical,
-            hand_data=hand_data,
-            finger_name=fn,
-            px_per_cm=px_per_cm,
-            card_detected=card_detected,
-            view_angle_ok=view_angle_ok,
-            card_result=card_result,
-            scale_confidence=scale_confidence,
-            edge_method=edge_method,
-            sobel_threshold=sobel_threshold,
-            sobel_kernel_size=sobel_kernel_size,
-            use_subpixel=use_subpixel,
-        )
         # Apply calibration
         raw_diam = result.get("finger_outer_diameter_cm")
@@ -1541,16 +1595,17 @@ def measure_multi_finger(
     # Build debug visualization
     if result_png_path is not None:
-        _draw_multi_finger_debug(
-            image_canonical=image_canonical,
-            per_finger_raw=per_finger_raw,
-            aggregated=aggregated,
-            card_result=card_result,
-            px_per_cm=px_per_cm,
-            result_png_path=result_png_path,
-            hand_mask=hand_data.get("mask") if hand_data else None,
-            hand_landmarks=hand_data.get("landmarks") if hand_data else None,
-        )
     # Clean internal data from output
     for fn, r in per_finger_raw.items():
@@ -1563,6 +1618,11 @@ def measure_multi_finger(
         "lighting_uniform": lighting.get("uniform", True),
         "fingers_well_spaced": spacing.get("well_spaced", True),
     }
     return aggregated

 import argparse
 import json
 import sys
+import time
+from contextlib import contextmanager
 from pathlib import Path
 from typing import Optional, Dict, Any, List, Literal, Tuple
+@contextmanager
+def _phase(name: str, totals: Optional[Dict[str, float]] = None):
+    """Log elapsed wall time for a pipeline phase.
+    Prints `[timing] <name>: <ms> ms` on exit. If `totals` is passed, the
+    elapsed milliseconds are also accumulated under `name` so the caller can
+    print a summary at the end.
+    """
+    t0 = time.perf_counter()
+    try:
+        yield
+    finally:
+        dt_ms = (time.perf_counter() - t0) * 1000.0
+        print(f"[timing] {name}: {dt_ms:.1f} ms")
+        if totals is not None:
+            totals[name] = totals.get(name, 0.0) + dt_ms
+def _print_timing_summary(totals: Dict[str, float]) -> None:
+    if not totals:
+        return
+    total_ms = sum(totals.values())
+    print(f"[timing] ===== summary (total {total_ms:.1f} ms) =====")
+    for name, ms in sorted(totals.items(), key=lambda kv: -kv[1]):
+        pct = (ms / total_ms * 100.0) if total_ms > 0 else 0.0
+        print(f"[timing]   {name:<28s} {ms:8.1f} ms  ({pct:5.1f}%)")
 import cv2
 import numpy as np
     Returns:
         Output dictionary with measurement results
     """
+    timings: Dict[str, float] = {}
+    t_pipeline_start = time.perf_counter()
     # Phase 2: Image quality metrics (informational only — no hard fail)
+    with _phase("image_quality", timings):
+        quality = assess_image_quality(image)
     print(f"Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, "
           f"contrast={quality['contrast']:.1f}")
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
+    with _phase(f"hand_segment[{hand_mask_method}]", timings):
+        hand_data = segment_hand(
+            image,
+            finger=finger_index,
+            debug_dir=finger_debug_dir,
+            use_sam_mask=(hand_mask_method == "sam"),
+        )
     if hand_data is None:
         print("No hand detected in image")
         view_angle_ok = True
         card_detected = False
     else:
+        with _phase(f"card_detect[{card_method}]", timings):
+            if card_method == "sam":
+                card_result = _sam_card_detect(
+                    image_canonical, hand_data, save_debug, result_png_path
+                )
+            else:
+                card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             print("Credit card not detected in image")
     # length and can cut into a wider-than-average finger, which would make
     # the mask boundary narrower than the true SAM boundary.
     raw_hand_mask = hand_data.get("mask")
+    with _phase("finger_isolate", timings):
+        finger_data = isolate_finger(hand_data, finger=finger_index, image_shape=(h_can, w_can))
     if finger_data is None:
         print(f"Could not isolate finger: {finger_index}")
             else:
                 edge_mask_input = cleaned_mask
+            with _phase(f"edge_refine[{mask_mode}]", timings):
+                sobel_measurement = refine_edges_sobel(
+                    image=image_canonical,  # Use canonical orientation
+                    axis_data=axis_data,
+                    zone_data=zone_data,
+                    scale_px_per_cm=px_per_cm,
+                    finger_landmarks=finger_data.get("landmarks"),
+                    sobel_threshold=sobel_threshold,
+                    kernel_size=sobel_kernel_size,
+                    use_subpixel=use_subpixel,
+                    finger_mask=edge_mask_input,
+                    debug_dir=edge_debug_dir,
+                    mask_mode=mask_mode,
+                    finger_name=finger_data.get("finger_name"),
+                )
             sobel_width_cm = sobel_measurement["median_width_cm"]
             print(f"Edge width: {sobel_width_cm:.4f}cm "
         print(f"Warning: Confidence {confidence_breakdown['overall']:.3f} is below threshold {confidence_threshold:.3f}")
     # Phase 9: Result visualization (always generated)
+    t_viz_start = time.perf_counter() if result_png_path is not None else None
     if result_png_path is not None:
         print(f"Generating result visualization...")
         _save_debug_visualization(result_png_path, debug_image)
         print(f"Result visualization saved to: {result_png_path}")
+    if t_viz_start is not None:
+        viz_ms = (time.perf_counter() - t_viz_start) * 1000.0
+        print(f"[timing] visualization: {viz_ms:.1f} ms")
+        timings["visualization"] = timings.get("visualization", 0.0) + viz_ms
+    pipeline_ms = (time.perf_counter() - t_pipeline_start) * 1000.0
+    print(f"[timing] pipeline_total: {pipeline_ms:.1f} ms")
+    _print_timing_summary(timings)
     return create_output(
         finger_diameter_cm=median_width_cm,
     """
     from src.finger_segmentation import FINGER_LANDMARKS
+    timings: Dict[str, float] = {}
+    t_pipeline_start = time.perf_counter()
     # Phase 1: Image quality metrics (informational only — no hard fail)
+    with _phase("image_quality", timings):
+        quality = assess_image_quality(image)
     print(f"[multi] Image quality: blur={quality['blur_score']:.1f}, "
           f"brightness={quality['brightness']:.1f}, contrast={quality['contrast']:.1f}")
     if not quality["passed"]:
     if save_debug and result_png_path is not None:
         finger_debug_dir = str(Path(result_png_path).parent / "finger_segmentation_debug")
+    with _phase(f"hand_segment[{hand_mask_method}]", timings):
+        hand_data = segment_hand(
+            image,
+            finger="index",
+            debug_dir=finger_debug_dir,
+            use_sam_mask=(hand_mask_method == "sam"),
+        )
     if hand_data is None:
         print("[multi] No hand detected")
         return {"fail_reason": "hand_not_detected", "per_finger": {}, "fingers_measured": 0, "fingers_succeeded": 0}
         view_angle_ok = True
         card_detected = False
     else:
+        with _phase(f"card_detect[{card_method}]", timings):
+            if card_method == "sam":
+                card_result = _sam_card_detect(
+                    image_canonical, hand_data, save_debug, result_png_path
+                )
+            else:
+                card_result = detect_credit_card(image_canonical, debug_dir=card_debug_dir)
         if card_result is None:
             # Emit a diagnostic visualization so the failure is debuggable:
             # hand mask + card-prompt seeds on the canonical image. Without
     per_finger_raw: Dict[str, Dict] = {}
     for fn in MULTI_FINGERS:
         print(f"\n[multi] === Measuring {fn} finger ===")
+        with _phase(f"measure_finger[{fn}]", timings):
+            result = _measure_single_finger_from_shared(
+                image_canonical=image_canonical,
+                hand_data=hand_data,
+                finger_name=fn,
+                px_per_cm=px_per_cm,
+                card_detected=card_detected,
+                view_angle_ok=view_angle_ok,
+                card_result=card_result,
+                scale_confidence=scale_confidence,
+                edge_method=edge_method,
+                sobel_threshold=sobel_threshold,
+                sobel_kernel_size=sobel_kernel_size,
+                use_subpixel=use_subpixel,
+            )
         # Apply calibration
         raw_diam = result.get("finger_outer_diameter_cm")
     # Build debug visualization
     if result_png_path is not None:
+        with _phase("visualization", timings):
+            _draw_multi_finger_debug(
+                image_canonical=image_canonical,
+                per_finger_raw=per_finger_raw,
+                aggregated=aggregated,
+                card_result=card_result,
+                px_per_cm=px_per_cm,
+                result_png_path=result_png_path,
+                hand_mask=hand_data.get("mask") if hand_data else None,
+                hand_landmarks=hand_data.get("landmarks") if hand_data else None,
+            )
     # Clean internal data from output
     for fn, r in per_finger_raw.items():
         "lighting_uniform": lighting.get("uniform", True),
         "fingers_well_spaced": spacing.get("well_spaced", True),
     }
+    pipeline_ms = (time.perf_counter() - t_pipeline_start) * 1000.0
+    print(f"[timing] pipeline_total: {pipeline_ms:.1f} ms")
+    _print_timing_summary(timings)
     return aggregated

requirements.txt CHANGED Viewed

@@ -12,3 +12,6 @@ torch>=2.4.0
 torchvision>=0.19.0
 transformers>=4.47.0
 pillow>=10.0.0

 torchvision>=0.19.0
 transformers>=4.47.0
 pillow>=10.0.0
+# v5: HF ZeroGPU requires Gradio SDK; `spaces` provides @spaces.GPU (no-op off ZeroGPU)
+gradio>=4.44.0
+spaces>=0.30.0

src/sam_backend.py CHANGED Viewed

@@ -23,6 +23,35 @@ INFERENCE_MAX_SIDE = 1024
 _model = None
 _processor = None
 def get_sam2() -> Tuple[object, object]:
@@ -32,19 +61,26 @@ def get_sam2() -> Tuple[object, object]:
     the HEAD-request retry storm that happens when huggingface.co is slow or
     unreachable but the weights are already on disk. On a true cache miss we
     fall through to a normal online load.
     """
-    global _model, _processor
     if _model is None or _processor is None:
         from transformers import Sam2Model, Sam2Processor
         t0 = time.time()
-        print(f"  Loading SAM 2.1 ({SAM2_MODEL_ID})...")
         try:
             _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True)
-            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to("cpu").eval()
-            print(f"  SAM 2.1 loaded (offline cache) in {time.time() - t0:.1f}s")
         except (OSError, ValueError):
             # Cache miss — fall back to online download.
             _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID)
-            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to("cpu").eval()
-            print(f"  SAM 2.1 loaded (online) in {time.time() - t0:.1f}s")
     return _model, _processor

 _model = None
 _processor = None
+_device: str = "cpu"
+def _select_device() -> str:
+    """Pick a torch device for SAM inference.
+    Returns ``"cuda"`` when a GPU is visible (HF ZeroGPU exposes CUDA even
+    at module import time via an emulation shim, so this picks the right
+    path both at startup and inside ``@spaces.GPU`` functions), otherwise
+    ``"cpu"``. Import of torch is local so CLI users without it still see
+    a clean error from the caller.
+    """
+    try:
+        import torch
+        if torch.cuda.is_available():
+            return "cuda"
+    except Exception:
+        pass
+    return "cpu"
+def get_sam2_device() -> str:
+    """Return the device the SAM singleton was loaded on.
+    Callers use this to move their ``processor(..., return_tensors="pt")``
+    outputs onto the same device as the model before the forward pass.
+    Returns ``"cpu"`` before ``get_sam2()`` has been called.
+    """
+    return _device
 def get_sam2() -> Tuple[object, object]:
     the HEAD-request retry storm that happens when huggingface.co is slow or
     unreachable but the weights are already on disk. On a true cache miss we
     fall through to a normal online load.
+    The model is placed on the device returned by ``_select_device()``.
+    HF ZeroGPU docs require CUDA placements to happen at module-level
+    startup for best performance — callers in ZeroGPU Spaces should invoke
+    ``get_sam2()`` once at import time so this runs before the first
+    ``@spaces.GPU``-wrapped request.
     """
+    global _model, _processor, _device
     if _model is None or _processor is None:
         from transformers import Sam2Model, Sam2Processor
+        _device = _select_device()
         t0 = time.time()
+        print(f"  Loading SAM 2.1 ({SAM2_MODEL_ID}) on {_device}...")
         try:
             _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True)
+            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to(_device).eval()
+            print(f"  SAM 2.1 loaded (offline cache, {_device}) in {time.time() - t0:.1f}s")
         except (OSError, ValueError):
             # Cache miss — fall back to online download.
             _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID)
+            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to(_device).eval()
+            print(f"  SAM 2.1 loaded (online, {_device}) in {time.time() - t0:.1f}s")
     return _model, _processor

src/sam_card_detection.py CHANGED Viewed

@@ -26,7 +26,7 @@ from .card_detection import (
     get_quad_dimensions,
     order_corners,
 )
-from .sam_backend import INFERENCE_MAX_SIDE as PROMPT_INFERENCE_MAX_SIDE, get_sam2
 # HF Hub model id — tiny, small, base-plus, large
 SAM2_MODEL_ID = "facebook/sam2.1-hiera-small"
@@ -531,6 +531,13 @@ def detect_credit_card_sam_prompt(
         input_labels=input_labels,
         return_tensors="pt",
     )
     with torch.inference_mode():
         # multimask_output=True gives 3 masks per seed (small / medium / large
         # disambiguation of the prompt). Empirically this matters for card
@@ -542,13 +549,13 @@ def detect_credit_card_sam_prompt(
     # Score masks in the scaled 1024-space. Only the single winner is
     # upscaled to full resolution afterward, which avoids O(N) 12 MP resizes.
-    scaled_h = inputs["original_sizes"][0][0].item()
-    scaled_w = inputs["original_sizes"][0][1].item()
     scaled_area = float(scaled_h * scaled_w)
     masks_list = processor.post_process_masks(
         outputs.pred_masks.cpu(),
-        inputs["original_sizes"],
         mask_threshold=0.0,
     )
     masks_tensor = masks_list[0]  # (num_prompts, num_candidates, H_s, W_s)

     get_quad_dimensions,
     order_corners,
 )
+from .sam_backend import INFERENCE_MAX_SIDE as PROMPT_INFERENCE_MAX_SIDE, get_sam2, get_sam2_device
 # HF Hub model id — tiny, small, base-plus, large
 SAM2_MODEL_ID = "facebook/sam2.1-hiera-small"
         input_labels=input_labels,
         return_tensors="pt",
     )
+    # `original_sizes` is used after the forward pass for mask post-processing
+    # and scale calculations. Pull it to CPU before moving `inputs` to the
+    # model device so downstream code never has to chase device placement.
+    original_sizes_cpu = inputs["original_sizes"].cpu() if hasattr(inputs["original_sizes"], "cpu") else inputs["original_sizes"]
+    device = get_sam2_device()
+    if device != "cpu":
+        inputs = inputs.to(device)
     with torch.inference_mode():
         # multimask_output=True gives 3 masks per seed (small / medium / large
         # disambiguation of the prompt). Empirically this matters for card
     # Score masks in the scaled 1024-space. Only the single winner is
     # upscaled to full resolution afterward, which avoids O(N) 12 MP resizes.
+    scaled_h = int(original_sizes_cpu[0][0].item())
+    scaled_w = int(original_sizes_cpu[0][1].item())
     scaled_area = float(scaled_h * scaled_w)
     masks_list = processor.post_process_masks(
         outputs.pred_masks.cpu(),
+        original_sizes_cpu,
         mask_threshold=0.0,
     )
     masks_tensor = masks_list[0]  # (num_prompts, num_candidates, H_s, W_s)

src/sam_hand_segmentation.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import List, Optional, Tuple
 import cv2
 import numpy as np
-from .sam_backend import INFERENCE_MAX_SIDE, get_sam2
 def _downscale(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
@@ -87,6 +87,9 @@ def segment_hand_sam(
         input_labels=[[prompt_labels]],
         return_tensors="pt",
     )
     with torch.inference_mode():
         outputs = model(**inputs, multimask_output=True)

 import cv2
 import numpy as np
+from .sam_backend import INFERENCE_MAX_SIDE, get_sam2, get_sam2_device
 def _downscale(image_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
         input_labels=[[prompt_labels]],
         return_tensors="pt",
     )
+    device = get_sam2_device()
+    if device != "cpu":
+        inputs = inputs.to(device)
     with torch.inference_mode():
         outputs = model(**inputs, multimask_output=True)