pvs_backend / src /components /hand_cropper.py
adnankhan-11's picture
PVD System - Initial deployment
d2885a7
from pathlib import Path
import numpy as np
from src.entity.config_entity import MMPoseConfig, PhoneDetectorConfig
from src.utils.logger import get_logger
from src.utils.opencv_utils import crop_frame
class HandCropper:
"""
Crop hand regions using pose keypoints.
This is the cleaned version of the hand-cropping logic used in:
- processing.py
- yolo11_data_gather.py
It supports:
- left hand crop
- right hand crop
- merged hand crop when hands are close
- primary/secondary hand selection based on distance to face
"""
def __init__(
self,
mmpose_config: MMPoseConfig,
phone_detector_config: PhoneDetectorConfig,
log_dir: Path | None = None,
log_level: str = "INFO",
) -> None:
self.mmpose_config = mmpose_config
self.phone_detector_config = phone_detector_config
self.logger = get_logger(
self.__class__.__name__, log_dir=log_dir, level=log_level
)
def _extract_points(self, keypoints: np.ndarray) -> dict:
"""
Read important landmark points from one person keypoints array.
"""
kp_cfg = self.mmpose_config.keypoints
return {
"face_center": keypoints[kp_cfg.face_center_index][:2],
"left_elbow": keypoints[kp_cfg.left_elbow_index][:2],
"right_elbow": keypoints[kp_cfg.right_elbow_index][:2],
"left_wrist": keypoints[kp_cfg.left_wrist_index][:2],
"right_wrist": keypoints[kp_cfg.right_wrist_index][:2],
}
def _compute_hand_hw(self, frame: np.ndarray, xyxy: np.ndarray) -> tuple[int, int]:
"""
Decide hand crop size based on person bbox size.
"""
bbox_width = abs(xyxy[2] - xyxy[0])
if bbox_width / (frame.shape[1] + np.finfo(np.float32).eps) < 0.6:
edge = int(
bbox_width
* self.phone_detector_config.hand_crop_logic.far_body_hand_ratio
)
else:
edge = int(
frame.shape[1]
* self.phone_detector_config.hand_crop_logic.near_body_hand_ratio
)
return edge, edge
def compute_hand_centers(
self, keypoints: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
"""
Estimate hand centers using elbow-to-wrist direction.
"""
points = self._extract_points(keypoints)
left_arm_vector = points["left_wrist"] - points["left_elbow"]
right_arm_vector = points["right_wrist"] - points["right_elbow"]
extension_ratio = (
self.phone_detector_config.hand_crop_logic.hand_extension_ratio
)
left_hand_center = points["left_wrist"] + left_arm_vector * extension_ratio
right_hand_center = points["right_wrist"] + right_arm_vector * extension_ratio
return left_hand_center, right_hand_center
def crop_candidate_hands(
self, frame: np.ndarray, keypoints: np.ndarray, xyxy: np.ndarray
) -> dict:
"""
Generate candidate hand crops.
Returns a dictionary that contains:
- left crop
- right crop
- merged crop if hands are close
"""
hand_hw = self._compute_hand_hw(frame, xyxy)
bbox_width = abs(xyxy[2] - xyxy[0])
left_hand_center, right_hand_center = self.compute_hand_centers(keypoints)
distance_between_hands = np.linalg.norm(left_hand_center - right_hand_center)
merge_ratio_threshold = (
self.phone_detector_config.hand_crop_logic.merge_hand_distance_ratio
)
result = {
"left": None,
"right": None,
"merged": None,
"hand_hw": hand_hw,
}
if distance_between_hands > merge_ratio_threshold * bbox_width:
result["left"] = crop_frame(frame, left_hand_center, hand_hw)
result["right"] = crop_frame(frame, right_hand_center, hand_hw)
else:
merged_center = (left_hand_center + right_hand_center) // 2
result["merged"] = crop_frame(frame, merged_center, hand_hw)
return result
def get_priority_hand_crops(
self,
frame: np.ndarray,
keypoints: np.ndarray,
xyxy: np.ndarray,
) -> tuple[
tuple[np.ndarray, list[int]] | None, tuple[np.ndarray, list[int]] | None, float
]:
"""
Return:
- primary hand crop
- secondary hand crop
- spare distance ratio
This follows your current runtime logic:
choose the hand closer to the face first.
"""
points = self._extract_points(keypoints)
face_center = points["face_center"]
hand_crops = self.crop_candidate_hands(frame, keypoints, xyxy)
if hand_crops["merged"] is not None:
return hand_crops["merged"], None, 1.0
left_crop = hand_crops["left"]
right_crop = hand_crops["right"]
if left_crop is None and right_crop is None:
return None, None, 1.0
left_wrist = points["left_wrist"]
right_wrist = points["right_wrist"]
left_face_distance = np.linalg.norm(left_wrist - face_center)
right_face_distance = np.linalg.norm(right_wrist - face_center)
try:
spare_ratio = (
left_face_distance / (right_face_distance + np.finfo(np.float32).eps)
if left_face_distance <= right_face_distance
else right_face_distance
/ (left_face_distance + np.finfo(np.float32).eps)
)
except Exception:
spare_ratio = 1.0
if left_face_distance < right_face_distance:
primary_crop = left_crop
secondary_crop = right_crop
else:
primary_crop = right_crop
secondary_crop = left_crop
return primary_crop, secondary_crop, float(spare_ratio)