libras_processing / utils /mediapipe_utils.py
cngsm's picture
Upload 2 files
496aee2 verified
import cv2
import mediapipe as mp
import numpy as np
from tqdm import tqdm
class MediaPipeProcessor:
def __init__(self, config):
self.config = config.get('mediapipe_config', {})
self.setup_mediapipe()
def setup_mediapipe(self):
"""Configura os modelos do MediaPipe"""
self.mp_holistic = mp.solutions.holistic
self.mp_drawing = mp.solutions.drawing_utils
self.mp_drawing_styles = mp.solutions.drawing_styles
self.holistic = self.mp_holistic.Holistic(
static_image_mode=self.config.get('static_image_mode', False),
model_complexity=self.config.get('model_complexity', 1),
smooth_landmarks=self.config.get('smooth_landmarks', True),
min_detection_confidence=self.config.get('min_detection_confidence', 0.5),
min_tracking_confidence=self.config.get('min_tracking_confidence', 0.5)
)
def process_video(self, video_path):
"""Processa o vídeo e extrai keypoints"""
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
keypoints_data = []
print(f"Extraindo keypoints de {total_frames} frames...")
for frame_idx in tqdm(range(total_frames), desc="Processando frames"):
ret, frame = cap.read()
if not ret:
break
# Converter BGR para RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = self.holistic.process(frame_rgb)
frame_keypoints = self.extract_frame_keypoints(results)
frame_keypoints['frame_index'] = frame_idx
frame_keypoints['timestamp'] = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
keypoints_data.append(frame_keypoints)
cap.release()
return keypoints_data
def extract_frame_keypoints(self, results):
"""Extrai keypoints de um frame"""
keypoints = {}
# Face landmarks
if results.face_landmarks:
keypoints['face_landmarks'] = self.landmarks_to_array(
results.face_landmarks.landmark, 468
)
# Pose landmarks
if results.pose_landmarks:
keypoints['pose_landmarks'] = self.landmarks_to_array(
results.pose_landmarks.landmark, 33
)
# Left hand landmarks
if results.left_hand_landmarks:
keypoints['left_hand_landmarks'] = self.landmarks_to_array(
results.left_hand_landmarks.landmark, 21
)
# Right hand landmarks
if results.right_hand_landmarks:
keypoints['right_hand_landmarks'] = self.landmarks_to_array(
results.right_hand_landmarks.landmark, 21
)
return keypoints
def landmarks_to_array(self, landmarks, expected_count):
"""Converte landmarks do MediaPipe para array numpy"""
if not landmarks or len(landmarks) != expected_count:
return np.zeros((expected_count, 3))
return np.array([[lm.x, lm.y, lm.z] for lm in landmarks])