# preprocessing.py import cv2 import mediapipe as mp import tensorflow as tf class VideoPreprocessor: def __init__(self): self.mp_face_mesh = mp.solutions.face_mesh # Indices for lip landmarks self.UPPER_LIP_INDICES = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291] self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291] self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES def preprocess_video(self, video_path): cap = cv2.VideoCapture(video_path) frames = [] # Utilize mediapipe's GPU acceleration if available with self.mp_face_mesh.FaceMesh( static_image_mode=False, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) as face_mesh: while cap.isOpened(): ret, frame = cap.read() if not ret: break # Convert the BGR image to RGB rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Process the frame and get the facial landmarks results = face_mesh.process(rgb_frame) if results.multi_face_landmarks: # Get the landmarks for the first face face_landmarks = results.multi_face_landmarks[0] try: # Extract lip landmarks lip_landmarks = [face_landmarks.landmark[i] for i in self.LIP_INDICES] # Extract bounding box around the lips h, w, _ = frame.shape x_coords = [int(landmark.x * w) for landmark in lip_landmarks] y_coords = [int(landmark.y * h) for landmark in lip_landmarks] x_min, x_max = max(0, min(x_coords)), min(w, max(x_coords)) y_min, y_max = max(0, min(y_coords)), min(h, max(y_coords)) if x_max > x_min and y_max > y_min: # Crop the lip region lip_frame = frame[y_min:y_max, x_min:x_max] # Resize to 85x85 pixels lip_frame_resized = cv2.resize(lip_frame, (85, 85)) # Convert to grayscale using TensorFlow lip_frame_gray = tf.image.rgb_to_grayscale(lip_frame_resized) frames.append(lip_frame_gray) except Exception as e: print(f"Error processing frame: {e}") continue # Skip this frame else: print("No face landmarks detected in frame.") cap.release() if not frames: print("No frames extracted during preprocessing.") return None # Return None to indicate failure # Stack frames into a tensor frames = tf.stack(frames) # Normalize the frames mean = tf.math.reduce_mean(frames) std = tf.math.reduce_std(tf.cast(frames, tf.float32)) normalized_frames = tf.cast((frames - mean), tf.float32) / std return normalized_frames