Spaces:
Runtime error
Runtime error
| import cv2 | |
| import mediapipe as mp | |
| import pandas as pd | |
| import numpy as np | |
| def generate_column_names(): | |
| """ | |
| Generate column names for a DataFrame that will store coordinates of landmarks. | |
| Column names are formatted as '{coordinate}_{landmark_type}_{landmark_index}'. | |
| Returns: | |
| list: A list of strings representing the column names. | |
| """ | |
| columns = ['frame'] | |
| # face columns | |
| for coordinate in ['x', 'y']: | |
| for i in range(468): # Mediapipe face mesh contains 468 landmarks | |
| columns.append(f'{coordinate}_face_{i}') | |
| # hands columns | |
| for hand in ['left_hand', 'right_hand']: | |
| for coordinate in ['x', 'y']: | |
| for i in range(21): # Mediapipe hand model contains 21 landmarks | |
| columns.append(f'{coordinate}_{hand}_{i}') | |
| return columns | |
| def video_to_landmarks(video_path, columns): | |
| """ | |
| Extract face and hand landmarks from a video and store them in a DataFrame. | |
| The video is processed frame by frame. For each frame, face and hand landmarks | |
| are detected using MediaPipe's face mesh and hand models, respectively. | |
| The coordinates of the landmarks are stored in a DataFrame. | |
| Parameters: | |
| video_path (str): Path to the video file. | |
| columns (list): List of column names for the DataFrame. | |
| Returns: | |
| pd.DataFrame: A DataFrame where each row corresponds to a frame and each column corresponds to a landmark. | |
| """ | |
| mp_drawing = mp.solutions.drawing_utils | |
| mp_face_mesh = mp.solutions.face_mesh | |
| mp_hands = mp.solutions.hands | |
| cap = cv2.VideoCapture(video_path) | |
| df = pd.DataFrame(columns=columns) | |
| with mp_face_mesh.FaceMesh() as face_mesh, mp_hands.Hands(max_num_hands=2) as hands: | |
| frame_count = 0 | |
| while cap.isOpened(): | |
| success, frame = cap.read() | |
| if not success: | |
| break | |
| rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| results_face = face_mesh.process(rgb_frame) | |
| results_hands = hands.process(rgb_frame) | |
| # Initialize frame dictionary with NaNs | |
| frame_data = {column: np.NaN for column in columns} | |
| frame_data['frame'] = frame_count | |
| # Process face landmarks | |
| if results_face.multi_face_landmarks: | |
| for face_landmarks in results_face.multi_face_landmarks: | |
| for i, landmark in enumerate(face_landmarks.landmark): | |
| frame_data[f'x_face_{i}'] = landmark.x | |
| frame_data[f'y_face_{i}'] = landmark.y | |
| # Process hand landmarks | |
| if results_hands.multi_hand_landmarks: | |
| for hand_landmarks in results_hands.multi_hand_landmarks: | |
| if hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x < hand_landmarks.landmark[ | |
| mp_hands.HandLandmark.THUMB_TIP].x: | |
| hand_type = 'left_hand' | |
| else: | |
| hand_type = 'right_hand' | |
| for i, landmark in enumerate(hand_landmarks.landmark): | |
| frame_data[f'x_{hand_type}_{i}'] = landmark.x | |
| frame_data[f'y_{hand_type}_{i}'] = landmark.y | |
| df = df._append(frame_data, ignore_index=True) | |
| frame_count += 1 | |
| cap.release() | |
| return df | |
| # video_path = "videoplayback_with_landmarks.mp4" | |
| # df = video_to_landmarks(video_path, generate_column_names()) | |
| # | |
| # # Save the DataFrame to a CSV file | |
| # df.to_csv('landmarks.csv', index=False) | |