Spaces:
Runtime error
Runtime error
File size: 3,305 Bytes
b126da6 8a07d8a 41a19eb 3276b67 41a19eb 8a07d8a 41a19eb 8a07d8a 3276b67 41a19eb 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 3276b67 41a19eb 3276b67 3263c3e 3276b67 3263c3e 41a19eb 3263c3e 41a19eb 3263c3e 1d3782e 41a19eb 3263c3e 8a07d8a 3263c3e 41a19eb 3276b67 3263c3e 41a19eb 3276b67 ec68455 8a07d8a ec68455 8a07d8a ec68455 8a07d8a ec68455 8a07d8a 3263c3e 8a07d8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | # preprocessing.py
import cv2
import mediapipe as mp
import tensorflow as tf
class VideoPreprocessor:
def __init__(self):
self.mp_face_mesh = mp.solutions.face_mesh
# Indices for lip landmarks
self.UPPER_LIP_INDICES = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES
def preprocess_video(self, video_path):
cap = cv2.VideoCapture(video_path)
frames = []
# Utilize mediapipe's GPU acceleration if available
with self.mp_face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as face_mesh:
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Convert the BGR image to RGB
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Process the frame and get the facial landmarks
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
# Get the landmarks for the first face
face_landmarks = results.multi_face_landmarks[0]
try:
# Extract lip landmarks
lip_landmarks = [face_landmarks.landmark[i] for i in self.LIP_INDICES]
# Extract bounding box around the lips
h, w, _ = frame.shape
x_coords = [int(landmark.x * w) for landmark in lip_landmarks]
y_coords = [int(landmark.y * h) for landmark in lip_landmarks]
x_min, x_max = max(0, min(x_coords)), min(w, max(x_coords))
y_min, y_max = max(0, min(y_coords)), min(h, max(y_coords))
if x_max > x_min and y_max > y_min:
# Crop the lip region
lip_frame = frame[y_min:y_max, x_min:x_max]
# Resize to 85x85 pixels
lip_frame_resized = cv2.resize(lip_frame, (85, 85))
# Convert to grayscale using TensorFlow
lip_frame_gray = tf.image.rgb_to_grayscale(lip_frame_resized)
frames.append(lip_frame_gray)
except Exception as e:
print(f"Error processing frame: {e}")
continue # Skip this frame
else:
print("No face landmarks detected in frame.")
cap.release()
if not frames:
print("No frames extracted during preprocessing.")
return None # Return None to indicate failure
# Stack frames into a tensor
frames = tf.stack(frames)
# Normalize the frames
mean = tf.math.reduce_mean(frames)
std = tf.math.reduce_std(tf.cast(frames, tf.float32))
normalized_frames = tf.cast((frames - mean), tf.float32) / std
return normalized_frames
|