Spaces:

cngsm
/

libras_processing

Runtime error

App Files Files Community

libras_processing / utils /mediapipe_utils.py

cngsm

Upload 2 files

496aee2 verified 4 months ago

raw

history blame contribute delete

3.23 kB

	import cv2
	import mediapipe as mp
	import numpy as np
	from tqdm import tqdm

	class MediaPipeProcessor:
	def __init__(self, config):
	self.config = config.get('mediapipe_config', {})
	self.setup_mediapipe()

	def setup_mediapipe(self):
	"""Configura os modelos do MediaPipe"""
	self.mp_holistic = mp.solutions.holistic
	self.mp_drawing = mp.solutions.drawing_utils
	self.mp_drawing_styles = mp.solutions.drawing_styles

	self.holistic = self.mp_holistic.Holistic(
	static_image_mode=self.config.get('static_image_mode', False),
	model_complexity=self.config.get('model_complexity', 1),
	smooth_landmarks=self.config.get('smooth_landmarks', True),
	min_detection_confidence=self.config.get('min_detection_confidence', 0.5),
	min_tracking_confidence=self.config.get('min_tracking_confidence', 0.5)
	)

	def process_video(self, video_path):
	"""Processa o vídeo e extrai keypoints"""
	cap = cv2.VideoCapture(video_path)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	keypoints_data = []

	print(f"Extraindo keypoints de {total_frames} frames...")

	for frame_idx in tqdm(range(total_frames), desc="Processando frames"):
	ret, frame = cap.read()
	if not ret:
	break

	# Converter BGR para RGB
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	results = self.holistic.process(frame_rgb)

	frame_keypoints = self.extract_frame_keypoints(results)
	frame_keypoints['frame_index'] = frame_idx
	frame_keypoints['timestamp'] = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0

	keypoints_data.append(frame_keypoints)

	cap.release()
	return keypoints_data

	def extract_frame_keypoints(self, results):
	"""Extrai keypoints de um frame"""
	keypoints = {}

	# Face landmarks
	if results.face_landmarks:
	keypoints['face_landmarks'] = self.landmarks_to_array(
	results.face_landmarks.landmark, 468
	)

	# Pose landmarks
	if results.pose_landmarks:
	keypoints['pose_landmarks'] = self.landmarks_to_array(
	results.pose_landmarks.landmark, 33
	)

	# Left hand landmarks
	if results.left_hand_landmarks:
	keypoints['left_hand_landmarks'] = self.landmarks_to_array(
	results.left_hand_landmarks.landmark, 21
	)

	# Right hand landmarks
	if results.right_hand_landmarks:
	keypoints['right_hand_landmarks'] = self.landmarks_to_array(
	results.right_hand_landmarks.landmark, 21
	)

	return keypoints

	def landmarks_to_array(self, landmarks, expected_count):
	"""Converte landmarks do MediaPipe para array numpy"""
	if not landmarks or len(landmarks) != expected_count:
	return np.zeros((expected_count, 3))

	return np.array([[lm.x, lm.y, lm.z] for lm in landmarks])