File size: 3,233 Bytes
496aee2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import cv2
import mediapipe as mp
import numpy as np
from tqdm import tqdm

class MediaPipeProcessor:
    def __init__(self, config):
        self.config = config.get('mediapipe_config', {})
        self.setup_mediapipe()
    
    def setup_mediapipe(self):
        """Configura os modelos do MediaPipe"""
        self.mp_holistic = mp.solutions.holistic
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        
        self.holistic = self.mp_holistic.Holistic(
            static_image_mode=self.config.get('static_image_mode', False),
            model_complexity=self.config.get('model_complexity', 1),
            smooth_landmarks=self.config.get('smooth_landmarks', True),
            min_detection_confidence=self.config.get('min_detection_confidence', 0.5),
            min_tracking_confidence=self.config.get('min_tracking_confidence', 0.5)
        )
    
    def process_video(self, video_path):
        """Processa o vídeo e extrai keypoints"""
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        keypoints_data = []
        
        print(f"Extraindo keypoints de {total_frames} frames...")
        
        for frame_idx in tqdm(range(total_frames), desc="Processando frames"):
            ret, frame = cap.read()
            if not ret:
                break
            
            # Converter BGR para RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = self.holistic.process(frame_rgb)
            
            frame_keypoints = self.extract_frame_keypoints(results)
            frame_keypoints['frame_index'] = frame_idx
            frame_keypoints['timestamp'] = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
            
            keypoints_data.append(frame_keypoints)
        
        cap.release()
        return keypoints_data
    
    def extract_frame_keypoints(self, results):
        """Extrai keypoints de um frame"""
        keypoints = {}
        
        # Face landmarks
        if results.face_landmarks:
            keypoints['face_landmarks'] = self.landmarks_to_array(
                results.face_landmarks.landmark, 468
            )
        
        # Pose landmarks
        if results.pose_landmarks:
            keypoints['pose_landmarks'] = self.landmarks_to_array(
                results.pose_landmarks.landmark, 33
            )
        
        # Left hand landmarks
        if results.left_hand_landmarks:
            keypoints['left_hand_landmarks'] = self.landmarks_to_array(
                results.left_hand_landmarks.landmark, 21
            )
        
        # Right hand landmarks
        if results.right_hand_landmarks:
            keypoints['right_hand_landmarks'] = self.landmarks_to_array(
                results.right_hand_landmarks.landmark, 21
            )
        
        return keypoints
    
    def landmarks_to_array(self, landmarks, expected_count):
        """Converte landmarks do MediaPipe para array numpy"""
        if not landmarks or len(landmarks) != expected_count:
            return np.zeros((expected_count, 3))
        
        return np.array([[lm.x, lm.y, lm.z] for lm in landmarks])