Spaces:

thienphuc12339
/

Lip_Reading

Runtime error

App Files Files Community

Lip_Reading / preprocessing.py

thienphuc12339

Update preprocessing.py

db67496 verified over 1 year ago

raw

history blame contribute delete

3.31 kB

	# preprocessing.py

	import cv2
	import mediapipe as mp
	import tensorflow as tf

	class VideoPreprocessor:
	def __init__(self):
	self.mp_face_mesh = mp.solutions.face_mesh
	# Indices for lip landmarks
	self.UPPER_LIP_INDICES = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
	self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
	self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES

	def preprocess_video(self, video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []

	# Utilize mediapipe's GPU acceleration if available
	with self.mp_face_mesh.FaceMesh(
	static_image_mode=False,
	max_num_faces=1,
	refine_landmarks=True,
	min_detection_confidence=0.5,
	min_tracking_confidence=0.5
	) as face_mesh:
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	# Convert the BGR image to RGB
	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Process the frame and get the facial landmarks
	results = face_mesh.process(rgb_frame)

	if results.multi_face_landmarks:
	# Get the landmarks for the first face
	face_landmarks = results.multi_face_landmarks[0]

	try:
	# Extract lip landmarks
	lip_landmarks = [face_landmarks.landmark[i] for i in self.LIP_INDICES]

	# Extract bounding box around the lips
	h, w, _ = frame.shape
	x_coords = [int(landmark.x * w) for landmark in lip_landmarks]
	y_coords = [int(landmark.y * h) for landmark in lip_landmarks]

	x_min, x_max = max(0, min(x_coords)), min(w, max(x_coords))
	y_min, y_max = max(0, min(y_coords)), min(h, max(y_coords))

	if x_max > x_min and y_max > y_min:
	# Crop the lip region
	lip_frame = frame[y_min:y_max, x_min:x_max]

	# Resize to 85x85 pixels
	lip_frame_resized = cv2.resize(lip_frame, (85, 85))

	# Convert to grayscale using TensorFlow
	lip_frame_gray = tf.image.rgb_to_grayscale(lip_frame_resized)

	frames.append(lip_frame_gray)
	except Exception as e:
	print(f"Error processing frame: {e}")
	continue # Skip this frame
	else:
	print("No face landmarks detected in frame.")

	cap.release()

	if not frames:
	print("No frames extracted during preprocessing.")
	return None # Return None to indicate failure

	# Stack frames into a tensor
	frames = tf.stack(frames)

	# Normalize the frames
	mean = tf.math.reduce_mean(frames)
	std = tf.math.reduce_std(tf.cast(frames, tf.float32))
	normalized_frames = tf.cast((frames - mean), tf.float32) / std

	return normalized_frames