Spaces:
Sleeping
Sleeping
File size: 3,298 Bytes
5322ae1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | """
Preprocessing utilities for MediaPipe landmark extraction
"""
import cv2
import numpy as np
import mediapipe as mp
from typing import Optional, Tuple
# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
def mediapipe_detection(image: np.ndarray, model) -> Tuple[np.ndarray, object]:
"""
Process image with MediaPipe Holistic model.
Args:
image: Input frame (BGR format)
model: MediaPipe Holistic model instance
Returns:
Processed image and detection results
"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return image, results
def extract_keypoints(results) -> np.ndarray:
"""
Extract keypoints from MediaPipe detection results.
Features extracted:
- Pose landmarks: 33 points × 4 values (x, y, z, visibility) = 132 features
- Left hand landmarks: 21 points × 3 values (x, y, z) = 63 features
- Right hand landmarks: 21 points × 3 values (x, y, z) = 63 features
Total: 258 features
Args:
results: MediaPipe detection results
Returns:
Flattened array of 258 features
"""
# Extract pose landmarks (33 points × 4 features = 132)
pose = np.array([
[res.x, res.y, res.z, res.visibility]
for res in results.pose_landmarks.landmark
]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
# Extract left hand landmarks (21 points × 3 features = 63)
lh = np.array([
[res.x, res.y, res.z]
for res in results.left_hand_landmarks.landmark
]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
# Extract right hand landmarks (21 points × 3 features = 63)
rh = np.array([
[res.x, res.y, res.z]
for res in results.right_hand_landmarks.landmark
]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
return np.concatenate([pose, lh, rh])
def process_frame(frame: np.ndarray, holistic_model) -> Optional[np.ndarray]:
"""
Process a single frame and extract keypoints.
Args:
frame: Input frame (BGR format)
holistic_model: MediaPipe Holistic model instance
Returns:
Keypoints array (258 features) or None if no hands detected
"""
_, results = mediapipe_detection(frame, holistic_model)
# Only process if at least one hand is detected
if results.left_hand_landmarks or results.right_hand_landmarks:
keypoints = extract_keypoints(results)
return keypoints
return None
def decode_base64_image(base64_string: str) -> np.ndarray:
"""
Decode base64 string to numpy array (image).
Args:
base64_string: Base64 encoded image
Returns:
Decoded image as numpy array
"""
import base64
# Remove data URL prefix if present
if ',' in base64_string:
base64_string = base64_string.split(',')[1]
# Decode base64 to bytes
img_bytes = base64.b64decode(base64_string)
# Convert bytes to numpy array
nparr = np.frombuffer(img_bytes, np.uint8)
# Decode image
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return img
|