Spaces:
Runtime error
Runtime error
| # preprocessing.py | |
| import cv2 | |
| import mediapipe as mp | |
| import tensorflow as tf | |
| class VideoPreprocessor: | |
| def __init__(self): | |
| self.mp_face_mesh = mp.solutions.face_mesh | |
| # Indices for lip landmarks | |
| self.UPPER_LIP_INDICES = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291] | |
| self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291] | |
| self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES | |
| def preprocess_video(self, video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| frames = [] | |
| # Utilize mediapipe's GPU acceleration if available | |
| with self.mp_face_mesh.FaceMesh( | |
| static_image_mode=False, | |
| max_num_faces=1, | |
| refine_landmarks=True, | |
| min_detection_confidence=0.5, | |
| min_tracking_confidence=0.5 | |
| ) as face_mesh: | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Convert the BGR image to RGB | |
| rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| # Process the frame and get the facial landmarks | |
| results = face_mesh.process(rgb_frame) | |
| if results.multi_face_landmarks: | |
| # Get the landmarks for the first face | |
| face_landmarks = results.multi_face_landmarks[0] | |
| try: | |
| # Extract lip landmarks | |
| lip_landmarks = [face_landmarks.landmark[i] for i in self.LIP_INDICES] | |
| # Extract bounding box around the lips | |
| h, w, _ = frame.shape | |
| x_coords = [int(landmark.x * w) for landmark in lip_landmarks] | |
| y_coords = [int(landmark.y * h) for landmark in lip_landmarks] | |
| x_min, x_max = max(0, min(x_coords)), min(w, max(x_coords)) | |
| y_min, y_max = max(0, min(y_coords)), min(h, max(y_coords)) | |
| if x_max > x_min and y_max > y_min: | |
| # Crop the lip region | |
| lip_frame = frame[y_min:y_max, x_min:x_max] | |
| # Resize to 85x85 pixels | |
| lip_frame_resized = cv2.resize(lip_frame, (85, 85)) | |
| # Convert to grayscale using TensorFlow | |
| lip_frame_gray = tf.image.rgb_to_grayscale(lip_frame_resized) | |
| frames.append(lip_frame_gray) | |
| except Exception as e: | |
| print(f"Error processing frame: {e}") | |
| continue # Skip this frame | |
| else: | |
| print("No face landmarks detected in frame.") | |
| cap.release() | |
| if not frames: | |
| print("No frames extracted during preprocessing.") | |
| return None # Return None to indicate failure | |
| # Stack frames into a tensor | |
| frames = tf.stack(frames) | |
| # Normalize the frames | |
| mean = tf.math.reduce_mean(frames) | |
| std = tf.math.reduce_std(tf.cast(frames, tf.float32)) | |
| normalized_frames = tf.cast((frames - mean), tf.float32) / std | |
| return normalized_frames | |