Spaces:

thienphuc12339
/

Lip_Reading

Running

App Files Files Community

thienphuc12339 commited on Nov 27, 2024

Commit

8a07d8a

verified ·

1 Parent(s): 0479ad8

Update preprocessing.py

Browse files

Files changed (1) hide show

preprocessing.py +17 -52

preprocessing.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import cv2
 import mediapipe as mp
-import numpy as np
 import tensorflow as tf
 class VideoPreprocessor:
@@ -11,29 +12,10 @@ class VideoPreprocessor:
         self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
         self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES
-    def preprocess_video(self, video_path, frame_interval=2, max_frames=100):
         cap = cv2.VideoCapture(video_path)
         frames = []
-        frame_counter = 0
-        processed_frames = 0
-        # Check if video opened successfully
-        if not cap.isOpened():
-            print(f"Cannot open video file: {video_path}")
-            return None
-        # Video properties
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        duration = frame_count / fps if fps > 0 else 0
-        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
-        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
-        print(f"Video properties: FPS={fps}, Frame count={frame_count}, Duration={duration}s, Width={width}, Height={height}")
-        # Desired frame size
-        desired_width = 640
-        desired_height = 480
         # Utilize mediapipe's GPU acceleration if available
         with self.mp_face_mesh.FaceMesh(
             static_image_mode=False,
@@ -47,15 +29,6 @@ class VideoPreprocessor:
                 if not ret:
                     break
-                if frame_counter % frame_interval != 0:
-                    frame_counter += 1
-                    continue
-                frame_counter += 1
-                # Resize frame to desired dimensions
-                frame = cv2.resize(frame, (desired_width, desired_height))
                 # Convert the BGR image to RGB
                 rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
@@ -85,36 +58,28 @@ class VideoPreprocessor:
                             # Resize to 85x85 pixels
                             lip_frame_resized = cv2.resize(lip_frame, (85, 85))
-                            # Convert to grayscale using OpenCV
-                            lip_frame_gray = cv2.cvtColor(lip_frame_resized, cv2.COLOR_BGR2GRAY)
                             frames.append(lip_frame_gray)
-                            processed_frames += 1
-                            if processed_frames >= max_frames:
-                                break
                     except Exception as e:
                         print(f"Error processing frame: {e}")
                         continue  # Skip this frame
                 else:
                     print("No face landmarks detected in frame.")
-            cap.release()
-            if not frames:
-                print("No frames extracted during preprocessing.")
-                return None  # Return None to indicate failure
-            # Convert frames to NumPy array
-            frames = np.array(frames)
-            # Normalize the frames
-            mean = np.mean(frames)
-            std = np.std(frames)
-            normalized_frames = (frames - mean) / std
-            # Add channel dimension and convert to TensorFlow tensor
-            normalized_frames = np.expand_dims(normalized_frames, axis=-1)
-            normalized_frames = tf.convert_to_tensor(normalized_frames, dtype=tf.float32)
-            return normalized_frames

+# Updated preprocessing.py
 import cv2
 import mediapipe as mp
 import tensorflow as tf
 class VideoPreprocessor:
         self.LOWER_LIP_INDICES = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
         self.LIP_INDICES = self.UPPER_LIP_INDICES + self.LOWER_LIP_INDICES
+    def preprocess_video(self, video_path):
         cap = cv2.VideoCapture(video_path)
         frames = []
         # Utilize mediapipe's GPU acceleration if available
         with self.mp_face_mesh.FaceMesh(
             static_image_mode=False,
                 if not ret:
                     break
                 # Convert the BGR image to RGB
                 rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                             # Resize to 85x85 pixels
                             lip_frame_resized = cv2.resize(lip_frame, (85, 85))
+                            # Convert to grayscale using TensorFlow
+                            lip_frame_gray = tf.image.rgb_to_grayscale(lip_frame_resized)
                             frames.append(lip_frame_gray)
                     except Exception as e:
                         print(f"Error processing frame: {e}")
                         continue  # Skip this frame
                 else:
                     print("No face landmarks detected in frame.")
+        cap.release()
+        if not frames:
+            print("No frames extracted during preprocessing.")
+            return None  # Return None to indicate failure
+        # Stack frames into a tensor
+        frames = tf.stack(frames)
+        # Normalize the frames
+        mean = tf.math.reduce_mean(frames)
+        std = tf.math.reduce_std(tf.cast(frames, tf.float32))
+        normalized_frames = tf.cast((frames - mean), tf.float32) / std
+        return normalized_frames