ring-sizer / src /geometry.py
feng-x's picture
Upload folder using huggingface_hub
6f3fe10 verified
raw
history blame
17 kB
"""
Geometric computation utilities.
This module handles:
- Finger axis estimation from MediaPipe landmarks
- Ring-wearing zone localization
- Coordinate transformations (precise rotation to canonical frame)
"""
import logging
import cv2
import numpy as np
from typing import Tuple, Optional, Dict, Any
from .geometry_constants import (
MIN_LANDMARK_SPACING_PX,
MIN_FINGER_LENGTH_PX,
DEFAULT_ZONE_START_PCT,
DEFAULT_ZONE_END_PCT,
ANATOMICAL_ZONE_WIDTH_FACTOR,
)
logger = logging.getLogger(__name__)
def _validate_landmark_quality(landmarks: np.ndarray) -> Tuple[bool, str]:
"""
Validate quality of finger landmarks for axis estimation.
Args:
landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
Returns:
Tuple of (is_valid, reason)
"""
if landmarks is None or len(landmarks) != 4:
return False, "landmarks_missing_or_incomplete"
# Check for NaN or infinite values
if not np.all(np.isfinite(landmarks)):
return False, "landmarks_contain_invalid_values"
# Check reasonable spacing (landmarks not collapsed)
# Calculate distances between consecutive landmarks
distances = []
for i in range(len(landmarks) - 1):
dist = np.linalg.norm(landmarks[i + 1] - landmarks[i])
distances.append(dist)
# Check if any distance is too small (collapsed landmarks)
min_distance = min(distances)
if min_distance < MIN_LANDMARK_SPACING_PX:
return False, "landmarks_too_close"
# Check for monotonically increasing progression (no crossovers)
# Calculate overall direction from MCP to TIP
overall_direction = landmarks[3] - landmarks[0]
overall_length = np.linalg.norm(overall_direction)
if overall_length < MIN_FINGER_LENGTH_PX:
return False, "finger_too_short"
overall_direction = overall_direction / overall_length
# Project each landmark onto overall direction
# They should be monotonically increasing from MCP to TIP
projections = []
for i in range(len(landmarks)):
proj = np.dot(landmarks[i] - landmarks[0], overall_direction)
projections.append(proj)
# Check monotonic increase
for i in range(len(projections) - 1):
if projections[i + 1] <= projections[i]:
return False, "landmarks_not_monotonic"
return True, "valid"
def estimate_finger_axis_from_landmarks(
landmarks: np.ndarray,
method: str = "linear_fit"
) -> Dict[str, Any]:
"""
Calculate finger axis directly from anatomical landmarks.
OPTIMIZED: Focuses on the PIP-MCP segment (proximal phalanx, where the
ring actually sits) for better accuracy. For straight fingers (index,
middle) this agrees with the DIP-PIP direction to within ~1°, but ring
and pinky often hold a visible PIP-joint curl, so the proximal phalanx
is at a different angle from the middle phalanx. Rotating by the
proximal-phalanx direction makes the ring zone exactly vertical and
cross-sections perpendicular to the bone we measure.
Args:
landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
method: Calculation method
- "endpoints": MCP to TIP vector (legacy, less accurate)
- "linear_fit": MCP to PIP vector (DEFAULT, proximal phalanx)
- "median_direction": Median of 3 segment directions (robust to outliers)
Returns:
Dictionary containing:
- center: Axis center point at midpoint of MCP-PIP (x, y)
- direction: Unit direction vector (dx, dy) pointing palm→tip
- length: Full finger length in pixels (TIP to MCP, for reference)
- palm_end: Visualization endpoint (extended from MCP toward palm)
- tip_end: Visualization endpoint (extended from PIP toward tip)
- method: Method used ("landmarks")
"""
# Validate landmarks
is_valid, reason = _validate_landmark_quality(landmarks)
if not is_valid:
raise ValueError(f"Invalid landmarks for axis estimation: {reason}")
# Extract landmark positions
mcp = landmarks[0] # Metacarpophalangeal joint (knuckle, palm-side)
pip = landmarks[1] # Proximal interphalangeal joint
dip = landmarks[2] # Distal interphalangeal joint
tip = landmarks[3] # Fingertip
# Calculate direction based on method
# OPTIMIZED: Focus on the PIP-MCP segment (proximal phalanx = ring zone)
if method == "endpoints":
# Simple: vector from MCP to TIP (legacy, less accurate for ring zone)
direction = tip - mcp
direction_length = np.linalg.norm(direction)
direction = direction / direction_length
elif method == "linear_fit":
# OPTIMIZED: Use MCP→PIP, the proximal phalanx bone that a ring
# actually rests on. For ring and pinky this differs from the old
# DIP-PIP direction by the PIP-joint curl angle, which was
# silently tilting the measurement frame.
direction = pip - mcp # Vector from MCP to PIP (palm→tip)
direction_length = np.linalg.norm(direction)
direction = direction / direction_length
# Sanity check: direction should point palm→tip. (MCP→PIP already
# does, but verify in case landmarks are swapped.)
if np.dot(direction, tip - mcp) < 0:
direction = -direction
elif method == "median_direction":
# Robust to outliers: median of segment directions
# Calculate direction vectors for each segment
seg1_dir = (pip - mcp) / np.linalg.norm(pip - mcp)
seg2_dir = (dip - pip) / np.linalg.norm(dip - pip)
seg3_dir = (tip - dip) / np.linalg.norm(tip - dip)
# Take median of each component
directions = np.array([seg1_dir, seg2_dir, seg3_dir])
median_dir = np.median(directions, axis=0)
direction = median_dir / np.linalg.norm(median_dir)
else:
raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'")
# OPTIMIZED: Center on the proximal phalanx midpoint (the ring zone).
center = (mcp + pip) / 2.0
# Calculate finger length (still use full finger for reference)
length = np.linalg.norm(tip - mcp)
# OPTIMIZED: Visual endpoints span the proximal phalanx (MCP→PIP)
# extended slightly for visualization clarity.
segment_length = np.linalg.norm(pip - mcp)
extension_factor = 0.5 # Extend 50% beyond each endpoint for visualization
palm_end = mcp - direction * (segment_length * extension_factor)
tip_end = pip + direction * (segment_length * extension_factor)
return {
"center": center.astype(np.float32),
"direction": direction.astype(np.float32),
"length": float(length),
"palm_end": palm_end.astype(np.float32),
"tip_end": tip_end.astype(np.float32),
"method": "landmarks",
}
def estimate_finger_axis(
landmarks: np.ndarray,
landmark_method: str = "linear_fit",
) -> Dict[str, Any]:
"""Estimate the finger axis from MediaPipe finger landmarks.
Raises ``ValueError`` via :func:`_validate_landmark_quality` when the
landmarks are missing, non-finite, too close together, too short, or
non-monotonic — callers should treat that as a measurement failure with
``fail_reason="axis_estimation_failed"``.
"""
return estimate_finger_axis_from_landmarks(landmarks, method=landmark_method)
def localize_ring_zone(
axis_data: Dict[str, Any],
zone_start_pct: float = DEFAULT_ZONE_START_PCT,
zone_end_pct: float = DEFAULT_ZONE_END_PCT,
) -> Dict[str, Any]:
"""
Localize the ring-wearing zone along the finger axis.
Args:
axis_data: Output from estimate_finger_axis() containing center,
direction, length, palm_end, tip_end
zone_start_pct: Zone start as percentage from palm (default 15%)
zone_end_pct: Zone end as percentage from palm (default 25%)
Returns:
Dictionary containing:
- start_point: Zone start position (x, y)
- end_point: Zone end position (x, y)
- center_point: Zone center position (x, y)
- length: Zone length in pixels
- start_pct: Start percentage used
- end_pct: End percentage used
- localization_method: "percentage"
"""
# Extract axis information
palm_end = axis_data["palm_end"]
tip_end = axis_data["tip_end"]
direction = axis_data["direction"]
finger_length = axis_data["length"]
# Calculate zone positions along the axis
# Start at zone_start_pct from palm end
start_distance = finger_length * zone_start_pct
start_point = palm_end + direction * start_distance
# End at zone_end_pct from palm end
end_distance = finger_length * zone_end_pct
end_point = palm_end + direction * end_distance
# Calculate zone center
center_point = (start_point + end_point) / 2.0
# Zone length
zone_length = end_distance - start_distance
return {
"start_point": start_point.astype(np.float32),
"end_point": end_point.astype(np.float32),
"center_point": center_point.astype(np.float32),
"length": float(zone_length),
"start_pct": zone_start_pct,
"end_pct": zone_end_pct,
"localization_method": "percentage",
}
def localize_ring_zone_from_landmarks(
landmarks: np.ndarray,
axis_data: Dict[str, Any],
zone_type: str = "percentage",
zone_start_pct: float = DEFAULT_ZONE_START_PCT,
zone_end_pct: float = DEFAULT_ZONE_END_PCT,
) -> Dict[str, Any]:
"""
Localize ring-wearing zone using anatomical landmarks.
v1 Enhancement: Provides anatomical-based ring zone localization
as an alternative to percentage-based approach.
Args:
landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP]
axis_data: Output from estimate_finger_axis() containing center,
direction, length, palm_end, tip_end
zone_type: Zone localization method
- "percentage": 15-25% from palm (v0 compatible, default)
- "anatomical": Centered on PIP joint with proportional width
zone_start_pct: Zone start percentage (percentage mode only)
zone_end_pct: Zone end percentage (percentage mode only)
Returns:
Dictionary containing:
- start_point: Zone start position (x, y)
- end_point: Zone end position (x, y)
- center_point: Zone center position (x, y)
- length: Zone length in pixels
- localization_method: "percentage" or "anatomical"
"""
if zone_type == "percentage":
# Use percentage-based method (v0 compatible)
result = localize_ring_zone(axis_data, zone_start_pct, zone_end_pct)
return result
elif zone_type == "anatomical":
# Anatomical mode: Target the proximal phalanx (ring-wearing segment)
# Upper bound: PIP joint (toward fingertip)
# Lower bound: PIP - (DIP - PIP) = one segment length below PIP (toward palm)
# This spans the proximal phalanx where rings are typically worn
pip = landmarks[1]
dip = landmarks[2]
# Calculate segment length (DIP to PIP distance)
segment_vector = dip - pip # Vector from PIP to DIP
# Ring zone spans from PIP down toward palm by one segment length
# end_point is toward fingertip (PIP)
# start_point is toward palm (PIP - segment_vector = one segment below PIP)
end_point = pip.copy() # Upper bound at PIP
start_point = pip - segment_vector # Lower bound one segment below PIP
# Calculate zone center and length
center_point = (start_point + end_point) / 2.0
zone_length = np.linalg.norm(end_point - start_point)
return {
"start_point": start_point.astype(np.float32),
"end_point": end_point.astype(np.float32),
"center_point": center_point.astype(np.float32),
"length": float(zone_length),
"localization_method": "anatomical",
}
else:
raise ValueError(f"Unknown zone_type: {zone_type}. Use 'percentage' or 'anatomical'")
# ============================================================================
# Precise Image Rotation for Finger Alignment
# ============================================================================
def calculate_angle_from_vertical(direction: np.ndarray) -> float:
"""
Calculate the rotation needed to align a direction vector to vertical (upward).
In image coordinates, vertical upward is (0, -1) in (x, y) format.
Args:
direction: Unit direction vector (dx, dy) in (x, y) format
Returns:
Rotation angle in degrees to apply to align direction to vertical.
Positive = need to rotate counter-clockwise (CCW) in image coordinates.
Range: [-180, 180]
"""
# Vertical upward in image coordinates: (0, -1)
vertical = np.array([0.0, -1.0])
# Calculate angle using atan2(cross_product, dot_product)
# cross = dx * (-1) - dy * 0 = -dx
# dot = dx * 0 + dy * (-1) = -dy
cross = direction[0] * vertical[1] - direction[1] * vertical[0]
dot = np.dot(direction, vertical)
angle_rad = np.arctan2(cross, dot)
angle_deg = np.degrees(angle_rad)
# Negate the angle: if finger is tilted +10° CW from vertical,
# we need to rotate -10° (CCW) to straighten it
return -angle_deg
def rotate_image_precise(
image: np.ndarray,
angle_degrees: float,
center: Optional[Tuple[float, float]] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""
Rotate image by a precise angle around a center point.
Args:
image: Input image (grayscale or BGR)
angle_degrees: Rotation angle in degrees (positive = clockwise)
center: Rotation center (x, y). If None, uses image center.
Returns:
Tuple of:
- rotated_image: Rotated image (same size as input)
- rotation_matrix: 2x3 affine transformation matrix
"""
h, w = image.shape[:2]
if center is None:
center = (w / 2.0, h / 2.0)
# Get rotation matrix (OpenCV uses clockwise positive)
rotation_matrix = cv2.getRotationMatrix2D(center, angle_degrees, scale=1.0)
# Apply rotation
rotated = cv2.warpAffine(
image, rotation_matrix, (w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=0
)
return rotated, rotation_matrix
def transform_points_rotation(
points: np.ndarray,
rotation_matrix: np.ndarray
) -> np.ndarray:
"""
Transform points using a rotation matrix from cv2.getRotationMatrix2D.
Args:
points: Nx2 array of points in (x, y) format
rotation_matrix: 2x3 affine transformation matrix from cv2.getRotationMatrix2D
Returns:
Nx2 array of transformed points in (x, y) format
"""
# Add homogeneous coordinate (1) to each point: (x, y) -> (x, y, 1)
n_points = points.shape[0]
homogeneous = np.hstack([points, np.ones((n_points, 1))])
# Apply transformation: [2x3] @ [3xN]^T -> [2xN]^T
transformed = (rotation_matrix @ homogeneous.T).T
return transformed.astype(np.float32)
def rotate_axis_data(
axis_data: Dict[str, Any],
rotation_matrix: np.ndarray
) -> Dict[str, Any]:
"""
Update axis data after image rotation.
Args:
axis_data: Axis data dictionary with center, direction, palm_end, tip_end
rotation_matrix: 2x3 rotation matrix
Returns:
Updated axis data with transformed coordinates
"""
rotated = axis_data.copy()
# Transform center point
center = axis_data["center"].reshape(1, 2)
rotated["center"] = transform_points_rotation(center, rotation_matrix)[0]
# Transform direction vector (rotation only, no translation)
# For direction vectors, we only apply the rotation part (2x2)
rotation_only = rotation_matrix[:2, :2]
direction = axis_data["direction"].reshape(2, 1)
rotated_direction = (rotation_only @ direction).flatten()
rotated["direction"] = rotated_direction / np.linalg.norm(rotated_direction)
# Transform endpoints if they exist
if "palm_end" in axis_data:
palm_end = axis_data["palm_end"].reshape(1, 2)
rotated["palm_end"] = transform_points_rotation(palm_end, rotation_matrix)[0]
if "tip_end" in axis_data:
tip_end = axis_data["tip_end"].reshape(1, 2)
rotated["tip_end"] = transform_points_rotation(tip_end, rotation_matrix)[0]
return rotated
def rotate_contour(
contour: np.ndarray,
rotation_matrix: np.ndarray
) -> np.ndarray:
"""
Rotate a contour using rotation matrix.
Args:
contour: Nx2 array of contour points in (x, y) format
rotation_matrix: 2x3 rotation matrix
Returns:
Rotated contour in same format
"""
return transform_points_rotation(contour, rotation_matrix)