Spaces:

Zeqhx
/

MSL-Project

Sleeping

File size: 3,298 Bytes

5322ae1

"""
Preprocessing utilities for MediaPipe landmark extraction
"""
import cv2
import numpy as np
import mediapipe as mp
from typing import Optional, Tuple


# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils


def mediapipe_detection(image: np.ndarray, model) -> Tuple[np.ndarray, object]:
    """
    Process image with MediaPipe Holistic model.

    Args:
        image: Input frame (BGR format)
        model: MediaPipe Holistic model instance

    Returns:
        Processed image and detection results
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results


def extract_keypoints(results) -> np.ndarray:
    """
    Extract keypoints from MediaPipe detection results.

    Features extracted:
    - Pose landmarks: 33 points × 4 values (x, y, z, visibility) = 132 features
    - Left hand landmarks: 21 points × 3 values (x, y, z) = 63 features
    - Right hand landmarks: 21 points × 3 values (x, y, z) = 63 features
    Total: 258 features

    Args:
        results: MediaPipe detection results

    Returns:
        Flattened array of 258 features
    """
    # Extract pose landmarks (33 points × 4 features = 132)
    pose = np.array([
        [res.x, res.y, res.z, res.visibility]
        for res in results.pose_landmarks.landmark
    ]).flatten() if results.pose_landmarks else np.zeros(33 * 4)

    # Extract left hand landmarks (21 points × 3 features = 63)
    lh = np.array([
        [res.x, res.y, res.z]
        for res in results.left_hand_landmarks.landmark
    ]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)

    # Extract right hand landmarks (21 points × 3 features = 63)
    rh = np.array([
        [res.x, res.y, res.z]
        for res in results.right_hand_landmarks.landmark
    ]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)

    return np.concatenate([pose, lh, rh])


def process_frame(frame: np.ndarray, holistic_model) -> Optional[np.ndarray]:
    """
    Process a single frame and extract keypoints.

    Args:
        frame: Input frame (BGR format)
        holistic_model: MediaPipe Holistic model instance

    Returns:
        Keypoints array (258 features) or None if no hands detected
    """
    _, results = mediapipe_detection(frame, holistic_model)

    # Only process if at least one hand is detected
    if results.left_hand_landmarks or results.right_hand_landmarks:
        keypoints = extract_keypoints(results)
        return keypoints

    return None


def decode_base64_image(base64_string: str) -> np.ndarray:
    """
    Decode base64 string to numpy array (image).

    Args:
        base64_string: Base64 encoded image

    Returns:
        Decoded image as numpy array
    """
    import base64

    # Remove data URL prefix if present
    if ',' in base64_string:
        base64_string = base64_string.split(',')[1]

    # Decode base64 to bytes
    img_bytes = base64.b64decode(base64_string)

    # Convert bytes to numpy array
    nparr = np.frombuffer(img_bytes, np.uint8)

    # Decode image
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    return img