Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| import numpy as np | |
| from src.entity.config_entity import MMPoseConfig, PhoneDetectorConfig | |
| from src.utils.logger import get_logger | |
| from src.utils.opencv_utils import crop_frame | |
| class HandCropper: | |
| """ | |
| Crop hand regions using pose keypoints. | |
| This is the cleaned version of the hand-cropping logic used in: | |
| - processing.py | |
| - yolo11_data_gather.py | |
| It supports: | |
| - left hand crop | |
| - right hand crop | |
| - merged hand crop when hands are close | |
| - primary/secondary hand selection based on distance to face | |
| """ | |
| def __init__( | |
| self, | |
| mmpose_config: MMPoseConfig, | |
| phone_detector_config: PhoneDetectorConfig, | |
| log_dir: Path | None = None, | |
| log_level: str = "INFO", | |
| ) -> None: | |
| self.mmpose_config = mmpose_config | |
| self.phone_detector_config = phone_detector_config | |
| self.logger = get_logger( | |
| self.__class__.__name__, log_dir=log_dir, level=log_level | |
| ) | |
| def _extract_points(self, keypoints: np.ndarray) -> dict: | |
| """ | |
| Read important landmark points from one person keypoints array. | |
| """ | |
| kp_cfg = self.mmpose_config.keypoints | |
| return { | |
| "face_center": keypoints[kp_cfg.face_center_index][:2], | |
| "left_elbow": keypoints[kp_cfg.left_elbow_index][:2], | |
| "right_elbow": keypoints[kp_cfg.right_elbow_index][:2], | |
| "left_wrist": keypoints[kp_cfg.left_wrist_index][:2], | |
| "right_wrist": keypoints[kp_cfg.right_wrist_index][:2], | |
| } | |
| def _compute_hand_hw(self, frame: np.ndarray, xyxy: np.ndarray) -> tuple[int, int]: | |
| """ | |
| Decide hand crop size based on person bbox size. | |
| """ | |
| bbox_width = abs(xyxy[2] - xyxy[0]) | |
| if bbox_width / (frame.shape[1] + np.finfo(np.float32).eps) < 0.6: | |
| edge = int( | |
| bbox_width | |
| * self.phone_detector_config.hand_crop_logic.far_body_hand_ratio | |
| ) | |
| else: | |
| edge = int( | |
| frame.shape[1] | |
| * self.phone_detector_config.hand_crop_logic.near_body_hand_ratio | |
| ) | |
| return edge, edge | |
| def compute_hand_centers( | |
| self, keypoints: np.ndarray | |
| ) -> tuple[np.ndarray, np.ndarray]: | |
| """ | |
| Estimate hand centers using elbow-to-wrist direction. | |
| """ | |
| points = self._extract_points(keypoints) | |
| left_arm_vector = points["left_wrist"] - points["left_elbow"] | |
| right_arm_vector = points["right_wrist"] - points["right_elbow"] | |
| extension_ratio = ( | |
| self.phone_detector_config.hand_crop_logic.hand_extension_ratio | |
| ) | |
| left_hand_center = points["left_wrist"] + left_arm_vector * extension_ratio | |
| right_hand_center = points["right_wrist"] + right_arm_vector * extension_ratio | |
| return left_hand_center, right_hand_center | |
| def crop_candidate_hands( | |
| self, frame: np.ndarray, keypoints: np.ndarray, xyxy: np.ndarray | |
| ) -> dict: | |
| """ | |
| Generate candidate hand crops. | |
| Returns a dictionary that contains: | |
| - left crop | |
| - right crop | |
| - merged crop if hands are close | |
| """ | |
| hand_hw = self._compute_hand_hw(frame, xyxy) | |
| bbox_width = abs(xyxy[2] - xyxy[0]) | |
| left_hand_center, right_hand_center = self.compute_hand_centers(keypoints) | |
| distance_between_hands = np.linalg.norm(left_hand_center - right_hand_center) | |
| merge_ratio_threshold = ( | |
| self.phone_detector_config.hand_crop_logic.merge_hand_distance_ratio | |
| ) | |
| result = { | |
| "left": None, | |
| "right": None, | |
| "merged": None, | |
| "hand_hw": hand_hw, | |
| } | |
| if distance_between_hands > merge_ratio_threshold * bbox_width: | |
| result["left"] = crop_frame(frame, left_hand_center, hand_hw) | |
| result["right"] = crop_frame(frame, right_hand_center, hand_hw) | |
| else: | |
| merged_center = (left_hand_center + right_hand_center) // 2 | |
| result["merged"] = crop_frame(frame, merged_center, hand_hw) | |
| return result | |
| def get_priority_hand_crops( | |
| self, | |
| frame: np.ndarray, | |
| keypoints: np.ndarray, | |
| xyxy: np.ndarray, | |
| ) -> tuple[ | |
| tuple[np.ndarray, list[int]] | None, tuple[np.ndarray, list[int]] | None, float | |
| ]: | |
| """ | |
| Return: | |
| - primary hand crop | |
| - secondary hand crop | |
| - spare distance ratio | |
| This follows your current runtime logic: | |
| choose the hand closer to the face first. | |
| """ | |
| points = self._extract_points(keypoints) | |
| face_center = points["face_center"] | |
| hand_crops = self.crop_candidate_hands(frame, keypoints, xyxy) | |
| if hand_crops["merged"] is not None: | |
| return hand_crops["merged"], None, 1.0 | |
| left_crop = hand_crops["left"] | |
| right_crop = hand_crops["right"] | |
| if left_crop is None and right_crop is None: | |
| return None, None, 1.0 | |
| left_wrist = points["left_wrist"] | |
| right_wrist = points["right_wrist"] | |
| left_face_distance = np.linalg.norm(left_wrist - face_center) | |
| right_face_distance = np.linalg.norm(right_wrist - face_center) | |
| try: | |
| spare_ratio = ( | |
| left_face_distance / (right_face_distance + np.finfo(np.float32).eps) | |
| if left_face_distance <= right_face_distance | |
| else right_face_distance | |
| / (left_face_distance + np.finfo(np.float32).eps) | |
| ) | |
| except Exception: | |
| spare_ratio = 1.0 | |
| if left_face_distance < right_face_distance: | |
| primary_crop = left_crop | |
| secondary_crop = right_crop | |
| else: | |
| primary_crop = right_crop | |
| secondary_crop = left_crop | |
| return primary_crop, secondary_crop, float(spare_ratio) | |