""" Geometric computation utilities. This module handles: - Finger axis estimation (PCA and landmark-based) - Ring-wearing zone localization - Cross-section width measurement - Coordinate transformations """ import logging import cv2 import numpy as np from typing import Tuple, List, Optional, Dict, Any, Literal from .geometry_constants import ( MIN_LANDMARK_SPACING_PX, MIN_FINGER_LENGTH_PX, EPSILON, MIN_MASK_POINTS_FOR_PCA, ENDPOINT_SAMPLE_DISTANCE_FACTOR, DEFAULT_ZONE_START_PCT, DEFAULT_ZONE_END_PCT, ANATOMICAL_ZONE_WIDTH_FACTOR, MIN_DETERMINANT_FOR_INTERSECTION, ) logger = logging.getLogger(__name__) # Type for axis estimation method AxisMethod = Literal["auto", "landmarks", "pca"] def _validate_landmark_quality(landmarks: np.ndarray) -> Tuple[bool, str]: """ Validate quality of finger landmarks for axis estimation. Args: landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP] Returns: Tuple of (is_valid, reason) """ if landmarks is None or len(landmarks) != 4: return False, "landmarks_missing_or_incomplete" # Check for NaN or infinite values if not np.all(np.isfinite(landmarks)): return False, "landmarks_contain_invalid_values" # Check reasonable spacing (landmarks not collapsed) # Calculate distances between consecutive landmarks distances = [] for i in range(len(landmarks) - 1): dist = np.linalg.norm(landmarks[i + 1] - landmarks[i]) distances.append(dist) # Check if any distance is too small (collapsed landmarks) min_distance = min(distances) if min_distance < MIN_LANDMARK_SPACING_PX: return False, "landmarks_too_close" # Check for monotonically increasing progression (no crossovers) # Calculate overall direction from MCP to TIP overall_direction = landmarks[3] - landmarks[0] overall_length = np.linalg.norm(overall_direction) if overall_length < MIN_FINGER_LENGTH_PX: return False, "finger_too_short" overall_direction = overall_direction / overall_length # Project each landmark onto overall direction # They should be monotonically increasing from MCP to TIP projections = [] for i in range(len(landmarks)): proj = np.dot(landmarks[i] - landmarks[0], overall_direction) projections.append(proj) # Check monotonic increase for i in range(len(projections) - 1): if projections[i + 1] <= projections[i]: return False, "landmarks_not_monotonic" return True, "valid" def estimate_finger_axis_from_landmarks( landmarks: np.ndarray, method: str = "linear_fit" ) -> Dict[str, Any]: """ Calculate finger axis directly from anatomical landmarks. OPTIMIZED: Focuses on DIP-PIP segment (ring-wearing zone) for better accuracy. Args: landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP] method: Calculation method - "endpoints": MCP to TIP vector (legacy, less accurate) - "linear_fit": DIP to PIP vector (DEFAULT, optimized for ring measurements) - "median_direction": Median of 3 segment directions (robust to outliers) Returns: Dictionary containing: - center: Axis center point at midpoint of PIP-DIP (x, y) - direction: Unit direction vector (dx, dy) from PIP to DIP - length: Full finger length in pixels (TIP to MCP, for reference) - palm_end: Visualization endpoint (extended from PIP toward palm) - tip_end: Visualization endpoint (extended from DIP toward tip) - method: Method used ("landmarks") """ # Validate landmarks is_valid, reason = _validate_landmark_quality(landmarks) if not is_valid: raise ValueError(f"Invalid landmarks for axis estimation: {reason}") # Extract landmark positions mcp = landmarks[0] # Metacarpophalangeal joint (knuckle, palm-side) pip = landmarks[1] # Proximal interphalangeal joint dip = landmarks[2] # Distal interphalangeal joint tip = landmarks[3] # Fingertip # Calculate direction based on method # OPTIMIZED: Focus on DIP-PIP segment (ring-wearing zone) if method == "endpoints": # Simple: vector from MCP to TIP (legacy, less accurate for ring zone) direction = tip - mcp direction_length = np.linalg.norm(direction) direction = direction / direction_length elif method == "linear_fit": # OPTIMIZED: Use only DIP and PIP (most relevant for ring measurements) # These two joints define the proximal phalanx where rings are worn direction = dip - pip # Vector from PIP to DIP direction_length = np.linalg.norm(direction) direction = direction / direction_length # Ensure direction points from palm to tip (PIP to DIP) # Direction should already be correct, but verify if np.dot(direction, tip - mcp) < 0: direction = -direction elif method == "median_direction": # Robust to outliers: median of segment directions # Calculate direction vectors for each segment seg1_dir = (pip - mcp) / np.linalg.norm(pip - mcp) seg2_dir = (dip - pip) / np.linalg.norm(dip - pip) seg3_dir = (tip - dip) / np.linalg.norm(tip - dip) # Take median of each component directions = np.array([seg1_dir, seg2_dir, seg3_dir]) median_dir = np.median(directions, axis=0) direction = median_dir / np.linalg.norm(median_dir) else: raise ValueError(f"Unknown method: {method}. Use 'endpoints', 'linear_fit', or 'median_direction'") # OPTIMIZED: Center at midpoint of DIP and PIP (ring zone focus) center = (pip + dip) / 2.0 # Calculate finger length (still use full finger for reference) length = np.linalg.norm(tip - mcp) # OPTIMIZED: Visual endpoints are DIP and PIP (ring zone segment) # Extended slightly for visualization clarity segment_length = np.linalg.norm(dip - pip) extension_factor = 0.5 # Extend 50% beyond each endpoint for visualization palm_end = pip - direction * (segment_length * extension_factor) tip_end = dip + direction * (segment_length * extension_factor) return { "center": center.astype(np.float32), "direction": direction.astype(np.float32), "length": float(length), "palm_end": palm_end.astype(np.float32), "tip_end": tip_end.astype(np.float32), "method": "landmarks", } def _estimate_axis_pca( mask: np.ndarray, landmarks: Optional[np.ndarray] = None, ) -> Dict[str, Any]: """ Estimate finger axis using PCA on mask points. This is the original v0 implementation, now refactored as a helper function. Args: mask: Binary finger mask landmarks: Optional finger landmarks for orientation (4x2 array) Returns: Dictionary containing axis data with method="pca" Keys: center, direction, length, palm_end, tip_end, method """ # Get all non-zero points in the mask points = np.column_stack(np.where(mask > 0)) # Returns (row, col) i.e., (y, x) points = points[:, [1, 0]] # Convert to (x, y) format if len(points) < MIN_MASK_POINTS_FOR_PCA: raise ValueError("Not enough points in mask for axis estimation") # Calculate center (centroid) center = np.mean(points, axis=0) # Center the points centered = points - center # Compute covariance matrix cov = np.cov(centered.T) # Compute eigenvalues and eigenvectors eigenvalues, eigenvectors = np.linalg.eigh(cov) # Principal axis is the eigenvector with largest eigenvalue principal_idx = np.argmax(eigenvalues) direction = eigenvectors[:, principal_idx] # Ensure direction is a unit vector direction = direction / np.linalg.norm(direction) # Project all points onto the principal axis to find endpoints projections = np.dot(centered, direction) min_proj = np.min(projections) max_proj = np.max(projections) # Calculate finger length length = max_proj - min_proj # Calculate endpoints along the axis endpoint1 = center + direction * min_proj endpoint2 = center + direction * max_proj # Determine which endpoint is palm vs tip # If landmarks are provided, use them for orientation if landmarks is not None and len(landmarks) == 4: # landmarks[0] is MCP (palm side), landmarks[3] is tip base_point = landmarks[0] tip_point = landmarks[3] # Determine which endpoint is closer to the base dist1_to_base = np.linalg.norm(endpoint1 - base_point) dist2_to_base = np.linalg.norm(endpoint2 - base_point) if dist1_to_base < dist2_to_base: palm_end = endpoint1 tip_end = endpoint2 else: palm_end = endpoint2 tip_end = endpoint1 direction = -direction # Flip direction to point from palm to tip else: # Without landmarks, use heuristic: tip is usually thinner # Sample points near each endpoint sample_distance = length * ENDPOINT_SAMPLE_DISTANCE_FACTOR # Points near endpoint1 near_ep1 = points[np.abs(projections - min_proj) < sample_distance] # Points near endpoint2 near_ep2 = points[np.abs(projections - max_proj) < sample_distance] # Calculate average distance from axis for each end (proxy for thickness) if len(near_ep1) > 0 and len(near_ep2) > 0: # Project distances perpendicular to axis perp_direction = np.array([-direction[1], direction[0]]) dist1 = np.mean(np.abs(np.dot(near_ep1 - center, perp_direction))) dist2 = np.mean(np.abs(np.dot(near_ep2 - center, perp_direction))) # Thinner end is likely the tip if dist1 < dist2: palm_end = endpoint2 tip_end = endpoint1 direction = -direction else: palm_end = endpoint1 tip_end = endpoint2 else: # Fallback: assume endpoint2 is tip (positive direction) palm_end = endpoint1 tip_end = endpoint2 return { "center": center.astype(np.float32), "direction": direction.astype(np.float32), "length": float(length), "palm_end": palm_end.astype(np.float32), "tip_end": tip_end.astype(np.float32), "method": "pca", } def estimate_finger_axis( mask: np.ndarray, landmarks: Optional[np.ndarray] = None, method: AxisMethod = "auto", landmark_method: str = "linear_fit", ) -> Dict[str, Any]: """ Estimate the principal axis of a finger using landmarks (preferred) or PCA (fallback). v1 Enhancement: Now supports landmark-based axis estimation for improved accuracy on bent fingers. Auto mode (default) uses landmarks when available and valid, falling back to PCA if needed. Args: mask: Binary finger mask landmarks: Optional finger landmarks (4x2 array: [MCP, PIP, DIP, TIP]) method: Axis estimation method - "auto": Use landmarks if available and valid, else PCA (recommended) - "landmarks": Force landmark-based (fails if landmarks invalid) - "pca": Force PCA-based (v0 behavior) landmark_method: Method for landmark-based estimation ("endpoints", "linear_fit", "median_direction") Returns: Dictionary containing: - center: Axis center point (x, y) - direction: Unit direction vector (dx, dy) pointing from palm to tip - length: Estimated finger length in pixels - palm_end: Palm-side endpoint - tip_end: Fingertip endpoint - method: Method actually used ("landmarks" or "pca") """ if method == "pca": # Force PCA method return _estimate_axis_pca(mask, landmarks) elif method == "landmarks": # Force landmark method (fail if landmarks invalid) if landmarks is None or len(landmarks) != 4: raise ValueError("Landmark method requested but landmarks not available") return estimate_finger_axis_from_landmarks(landmarks, method=landmark_method) elif method == "auto": # Auto mode: try landmarks first, fall back to PCA try: # Check if landmarks are available and valid if landmarks is not None and len(landmarks) == 4: is_valid, reason = _validate_landmark_quality(landmarks) if is_valid: # Use landmark-based method logger.debug(f"Using landmark-based axis estimation ({landmark_method})") return estimate_finger_axis_from_landmarks(landmarks, method=landmark_method) else: logger.debug(f"Landmarks available but quality check failed: {reason}") logger.debug("Falling back to PCA axis estimation") else: logger.debug("Landmarks not available, using PCA axis estimation") except Exception as e: logger.debug(f"Landmark-based axis estimation failed: {e}") logger.debug("Falling back to PCA axis estimation") # Fall back to PCA return _estimate_axis_pca(mask, landmarks) else: raise ValueError(f"Unknown method: {method}. Use 'auto', 'landmarks', or 'pca'") def localize_ring_zone( axis_data: Dict[str, Any], zone_start_pct: float = DEFAULT_ZONE_START_PCT, zone_end_pct: float = DEFAULT_ZONE_END_PCT, ) -> Dict[str, Any]: """ Localize the ring-wearing zone along the finger axis. Args: axis_data: Output from estimate_finger_axis() containing center, direction, length, palm_end, tip_end zone_start_pct: Zone start as percentage from palm (default 15%) zone_end_pct: Zone end as percentage from palm (default 25%) Returns: Dictionary containing: - start_point: Zone start position (x, y) - end_point: Zone end position (x, y) - center_point: Zone center position (x, y) - length: Zone length in pixels - start_pct: Start percentage used - end_pct: End percentage used - localization_method: "percentage" """ # Extract axis information palm_end = axis_data["palm_end"] tip_end = axis_data["tip_end"] direction = axis_data["direction"] finger_length = axis_data["length"] # Calculate zone positions along the axis # Start at zone_start_pct from palm end start_distance = finger_length * zone_start_pct start_point = palm_end + direction * start_distance # End at zone_end_pct from palm end end_distance = finger_length * zone_end_pct end_point = palm_end + direction * end_distance # Calculate zone center center_point = (start_point + end_point) / 2.0 # Zone length zone_length = end_distance - start_distance return { "start_point": start_point.astype(np.float32), "end_point": end_point.astype(np.float32), "center_point": center_point.astype(np.float32), "length": float(zone_length), "start_pct": zone_start_pct, "end_pct": zone_end_pct, "localization_method": "percentage", } def localize_ring_zone_from_landmarks( landmarks: np.ndarray, axis_data: Dict[str, Any], zone_type: str = "percentage", zone_start_pct: float = DEFAULT_ZONE_START_PCT, zone_end_pct: float = DEFAULT_ZONE_END_PCT, ) -> Dict[str, Any]: """ Localize ring-wearing zone using anatomical landmarks. v1 Enhancement: Provides anatomical-based ring zone localization as an alternative to percentage-based approach. Args: landmarks: 4x2 array of finger landmarks [MCP, PIP, DIP, TIP] axis_data: Output from estimate_finger_axis() containing center, direction, length, palm_end, tip_end zone_type: Zone localization method - "percentage": 15-25% from palm (v0 compatible, default) - "anatomical": Centered on PIP joint with proportional width zone_start_pct: Zone start percentage (percentage mode only) zone_end_pct: Zone end percentage (percentage mode only) Returns: Dictionary containing: - start_point: Zone start position (x, y) - end_point: Zone end position (x, y) - center_point: Zone center position (x, y) - length: Zone length in pixels - localization_method: "percentage" or "anatomical" """ if zone_type == "percentage": # Use percentage-based method (v0 compatible) result = localize_ring_zone(axis_data, zone_start_pct, zone_end_pct) return result elif zone_type == "anatomical": # Anatomical mode: Target the proximal phalanx (ring-wearing segment) # Upper bound: PIP joint (toward fingertip) # Lower bound: PIP - (DIP - PIP) = one segment length below PIP (toward palm) # This spans the proximal phalanx where rings are typically worn pip = landmarks[1] dip = landmarks[2] # Calculate segment length (DIP to PIP distance) segment_vector = dip - pip # Vector from PIP to DIP # Ring zone spans from PIP down toward palm by one segment length # end_point is toward fingertip (PIP) # start_point is toward palm (PIP - segment_vector = one segment below PIP) end_point = pip.copy() # Upper bound at PIP start_point = pip - segment_vector # Lower bound one segment below PIP # Calculate zone center and length center_point = (start_point + end_point) / 2.0 zone_length = np.linalg.norm(end_point - start_point) return { "start_point": start_point.astype(np.float32), "end_point": end_point.astype(np.float32), "center_point": center_point.astype(np.float32), "length": float(zone_length), "localization_method": "anatomical", } else: raise ValueError(f"Unknown zone_type: {zone_type}. Use 'percentage' or 'anatomical'") def compute_cross_section_width( contour: np.ndarray, axis_data: Dict[str, Any], zone_data: Dict[str, Any], num_samples: int = 20, ) -> Dict[str, Any]: """ Measure finger width by sampling cross-sections perpendicular to axis. Args: contour: Finger contour points (Nx2 array in x,y format) axis_data: Output from estimate_finger_axis() containing center, direction, length, palm_end, tip_end zone_data: Output from localize_ring_zone() containing start_point, end_point, center_point num_samples: Number of cross-section samples (default 20) Returns: Dictionary containing: - widths_px: List of width measurements in pixels - sample_points: List of (left, right) intersection point tuples - median_width_px: Median width in pixels - std_width_px: Standard deviation of widths - mean_width_px: Mean width in pixels - num_samples: Actual number of successful measurements """ direction = axis_data["direction"] start_point = zone_data["start_point"] end_point = zone_data["end_point"] # Perpendicular direction (rotate 90 degrees) perp_direction = np.array([-direction[1], direction[0]], dtype=np.float32) widths = [] sample_points_list = [] # Generate sample points along the zone for i in range(num_samples): # Interpolate between start and end t = i / (num_samples - 1) if num_samples > 1 else 0.5 sample_center = start_point + t * (end_point - start_point) # Find intersections with contour along perpendicular line intersections = line_contour_intersections( contour, sample_center, perp_direction ) if len(intersections) >= 2: # Convert to numpy array for distance calculations pts = np.array(intersections) # Find the two points that are farthest apart # This handles cases where the line intersects multiple times max_dist = 0 best_pair = None for j in range(len(pts)): for k in range(j + 1, len(pts)): dist = np.linalg.norm(pts[j] - pts[k]) if dist > max_dist: max_dist = dist best_pair = (pts[j], pts[k]) if best_pair is not None: widths.append(max_dist) sample_points_list.append(best_pair) if len(widths) == 0: raise ValueError("No valid width measurements found in ring zone") widths = np.array(widths) # Calculate statistics median_width = float(np.median(widths)) mean_width = float(np.mean(widths)) std_width = float(np.std(widths)) return { "widths_px": widths.tolist(), "sample_points": sample_points_list, "median_width_px": median_width, "mean_width_px": mean_width, "std_width_px": std_width, "num_samples": len(widths), } def line_contour_intersections( contour: np.ndarray, point: Tuple[float, float], direction: Tuple[float, float], ) -> List[Tuple[float, float]]: """ Find intersection points between a line and a contour. Uses parametric line-segment intersection to find where an infinite line intersects with the contour edges. Args: contour: Contour points (Nx2 array in x,y format) point: A point on the line (x, y) direction: Line direction vector (dx, dy), will be normalized Returns: List of intersection points as (x, y) tuples """ intersections = [] # Normalize direction direction = np.array(direction, dtype=np.float32) direction = direction / (np.linalg.norm(direction) + EPSILON) point = np.array(point, dtype=np.float32) # Check each edge of the contour n = len(contour) for i in range(n): p1 = contour[i] p2 = contour[(i + 1) % n] # Find intersection between line and edge segment # Line: P = point + t * direction # Segment: Q = p1 + s * (p2 - p1), where s ∈ [0, 1] edge_vec = p2 - p1 # Solve: point + t * direction = p1 + s * edge_vec # Rearranged: t * direction - s * edge_vec = p1 - point # Create matrix [direction, -edge_vec] * [t, s]^T = p1 - point A = np.column_stack([direction, -edge_vec]) b = p1 - point # Check if matrix is singular (parallel lines) det = A[0, 0] * A[1, 1] - A[0, 1] * A[1, 0] if abs(det) < MIN_DETERMINANT_FOR_INTERSECTION: continue # Solve for t and s try: params = np.linalg.solve(A, b) t, s = params[0], params[1] # Check if intersection is on the edge segment (s ∈ [0, 1]) if 0 <= s <= 1: intersection = point + t * direction intersections.append(tuple(intersection)) except np.linalg.LinAlgError: continue return intersections # ============================================================================ # Precise Image Rotation for Finger Alignment # ============================================================================ def calculate_angle_from_vertical(direction: np.ndarray) -> float: """ Calculate the rotation needed to align a direction vector to vertical (upward). In image coordinates, vertical upward is (0, -1) in (x, y) format. Args: direction: Unit direction vector (dx, dy) in (x, y) format Returns: Rotation angle in degrees to apply to align direction to vertical. Positive = need to rotate counter-clockwise (CCW) in image coordinates. Range: [-180, 180] """ # Vertical upward in image coordinates: (0, -1) vertical = np.array([0.0, -1.0]) # Calculate angle using atan2(cross_product, dot_product) # cross = dx * (-1) - dy * 0 = -dx # dot = dx * 0 + dy * (-1) = -dy cross = direction[0] * vertical[1] - direction[1] * vertical[0] dot = np.dot(direction, vertical) angle_rad = np.arctan2(cross, dot) angle_deg = np.degrees(angle_rad) # Negate the angle: if finger is tilted +10° CW from vertical, # we need to rotate -10° (CCW) to straighten it return -angle_deg def rotate_image_precise( image: np.ndarray, angle_degrees: float, center: Optional[Tuple[float, float]] = None ) -> Tuple[np.ndarray, np.ndarray]: """ Rotate image by a precise angle around a center point. Args: image: Input image (grayscale or BGR) angle_degrees: Rotation angle in degrees (positive = clockwise) center: Rotation center (x, y). If None, uses image center. Returns: Tuple of: - rotated_image: Rotated image (same size as input) - rotation_matrix: 2x3 affine transformation matrix """ h, w = image.shape[:2] if center is None: center = (w / 2.0, h / 2.0) # Get rotation matrix (OpenCV uses clockwise positive) rotation_matrix = cv2.getRotationMatrix2D(center, angle_degrees, scale=1.0) # Apply rotation rotated = cv2.warpAffine( image, rotation_matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0 ) return rotated, rotation_matrix def transform_points_rotation( points: np.ndarray, rotation_matrix: np.ndarray ) -> np.ndarray: """ Transform points using a rotation matrix from cv2.getRotationMatrix2D. Args: points: Nx2 array of points in (x, y) format rotation_matrix: 2x3 affine transformation matrix from cv2.getRotationMatrix2D Returns: Nx2 array of transformed points in (x, y) format """ # Add homogeneous coordinate (1) to each point: (x, y) -> (x, y, 1) n_points = points.shape[0] homogeneous = np.hstack([points, np.ones((n_points, 1))]) # Apply transformation: [2x3] @ [3xN]^T -> [2xN]^T transformed = (rotation_matrix @ homogeneous.T).T return transformed.astype(np.float32) def rotate_axis_data( axis_data: Dict[str, Any], rotation_matrix: np.ndarray ) -> Dict[str, Any]: """ Update axis data after image rotation. Args: axis_data: Axis data dictionary with center, direction, palm_end, tip_end rotation_matrix: 2x3 rotation matrix Returns: Updated axis data with transformed coordinates """ rotated = axis_data.copy() # Transform center point center = axis_data["center"].reshape(1, 2) rotated["center"] = transform_points_rotation(center, rotation_matrix)[0] # Transform direction vector (rotation only, no translation) # For direction vectors, we only apply the rotation part (2x2) rotation_only = rotation_matrix[:2, :2] direction = axis_data["direction"].reshape(2, 1) rotated_direction = (rotation_only @ direction).flatten() rotated["direction"] = rotated_direction / np.linalg.norm(rotated_direction) # Transform endpoints if they exist if "palm_end" in axis_data: palm_end = axis_data["palm_end"].reshape(1, 2) rotated["palm_end"] = transform_points_rotation(palm_end, rotation_matrix)[0] if "tip_end" in axis_data: tip_end = axis_data["tip_end"].reshape(1, 2) rotated["tip_end"] = transform_points_rotation(tip_end, rotation_matrix)[0] return rotated def rotate_contour( contour: np.ndarray, rotation_matrix: np.ndarray ) -> np.ndarray: """ Rotate a contour using rotation matrix. Args: contour: Nx2 array of contour points in (x, y) format rotation_matrix: 2x3 rotation matrix Returns: Rotated contour in same format """ return transform_points_rotation(contour, rotation_matrix)