""" Preprocessing utilities for MediaPipe landmark extraction """ import cv2 import numpy as np import mediapipe as mp from typing import Optional, Tuple # Initialize MediaPipe mp_holistic = mp.solutions.holistic mp_drawing = mp.solutions.drawing_utils def mediapipe_detection(image: np.ndarray, model) -> Tuple[np.ndarray, object]: """ Process image with MediaPipe Holistic model. Args: image: Input frame (BGR format) model: MediaPipe Holistic model instance Returns: Processed image and detection results """ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image.flags.writeable = False results = model.process(image) image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return image, results def extract_keypoints(results) -> np.ndarray: """ Extract keypoints from MediaPipe detection results. Features extracted: - Pose landmarks: 33 points × 4 values (x, y, z, visibility) = 132 features - Left hand landmarks: 21 points × 3 values (x, y, z) = 63 features - Right hand landmarks: 21 points × 3 values (x, y, z) = 63 features Total: 258 features Args: results: MediaPipe detection results Returns: Flattened array of 258 features """ # Extract pose landmarks (33 points × 4 features = 132) pose = np.array([ [res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark ]).flatten() if results.pose_landmarks else np.zeros(33 * 4) # Extract left hand landmarks (21 points × 3 features = 63) lh = np.array([ [res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark ]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3) # Extract right hand landmarks (21 points × 3 features = 63) rh = np.array([ [res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark ]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3) return np.concatenate([pose, lh, rh]) def process_frame(frame: np.ndarray, holistic_model) -> Optional[np.ndarray]: """ Process a single frame and extract keypoints. Args: frame: Input frame (BGR format) holistic_model: MediaPipe Holistic model instance Returns: Keypoints array (258 features) or None if no hands detected """ _, results = mediapipe_detection(frame, holistic_model) # Only process if at least one hand is detected if results.left_hand_landmarks or results.right_hand_landmarks: keypoints = extract_keypoints(results) return keypoints return None def decode_base64_image(base64_string: str) -> np.ndarray: """ Decode base64 string to numpy array (image). Args: base64_string: Base64 encoded image Returns: Decoded image as numpy array """ import base64 # Remove data URL prefix if present if ',' in base64_string: base64_string = base64_string.split(',')[1] # Decode base64 to bytes img_bytes = base64.b64decode(base64_string) # Convert bytes to numpy array nparr = np.frombuffer(img_bytes, np.uint8) # Decode image img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) return img