Upload 12 files
Browse files- .gitattributes +3 -0
- best_model_sequences.keras +3 -0
- data_preprocessing_sequences.py +174 -0
- dataset_preparation_sequences.py +116 -0
- dataset_sequences.pkl +3 -0
- final_model_sequences.keras +3 -0
- frame_extraction.py +61 -0
- history_sequences.pkl +3 -0
- model_building_sequences.py +110 -0
- model_evaluation_sequences.py +123 -0
- prediction_sequences.py +284 -0
- shape_predictor_68_face_landmarks.dat +3 -0
- video_capture.py +62 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
best_model_sequences.keras filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
final_model_sequences.keras filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
best_model_sequences.keras
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80f527be8dbdcfb36488d84cecad86368a1c8c4f618eafd204e6f23bcbef42ed
|
| 3 |
+
size 342613105
|
data_preprocessing_sequences.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# data_preprocessing_sequences.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import cv2
|
| 5 |
+
import dlib
|
| 6 |
+
import numpy as np
|
| 7 |
+
from imutils import face_utils
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
import pickle
|
| 10 |
+
|
| 11 |
+
def get_facial_landmarks(detector, predictor, image):
|
| 12 |
+
"""
|
| 13 |
+
Detects facial landmarks in an image.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
detector: dlib face detector.
|
| 17 |
+
predictor: dlib shape predictor.
|
| 18 |
+
image (numpy.ndarray): Input image.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
dict: Coordinates of eyes and eyebrows.
|
| 22 |
+
"""
|
| 23 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 24 |
+
rects = detector(gray, 1)
|
| 25 |
+
|
| 26 |
+
if len(rects) == 0:
|
| 27 |
+
return None # No face detected
|
| 28 |
+
|
| 29 |
+
# Assuming the first detected face is the target
|
| 30 |
+
rect = rects[0]
|
| 31 |
+
shape = predictor(gray, rect)
|
| 32 |
+
shape = face_utils.shape_to_np(shape)
|
| 33 |
+
|
| 34 |
+
landmarks = {}
|
| 35 |
+
# Define landmarks for left and right eyes and eyebrows
|
| 36 |
+
landmarks['left_eye'] = shape[36:42] # Left eye landmarks
|
| 37 |
+
landmarks['right_eye'] = shape[42:48] # Right eye landmarks
|
| 38 |
+
landmarks['left_eyebrow'] = shape[17:22] # Left eyebrow landmarks
|
| 39 |
+
landmarks['right_eyebrow'] = shape[22:27] # Right eyebrow landmarks
|
| 40 |
+
|
| 41 |
+
return landmarks
|
| 42 |
+
|
| 43 |
+
def extract_roi(image, landmarks, region='left_eye', padding=5):
|
| 44 |
+
"""
|
| 45 |
+
Extracts a region of interest (ROI) from the image based on landmarks.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
image (numpy.ndarray): Input image.
|
| 49 |
+
landmarks (dict): Facial landmarks.
|
| 50 |
+
region (str): Region to extract ('left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow').
|
| 51 |
+
padding (int): Padding around the ROI.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
numpy.ndarray: Extracted ROI.
|
| 55 |
+
"""
|
| 56 |
+
points = landmarks.get(region)
|
| 57 |
+
if points is None:
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
# Compute the bounding box
|
| 61 |
+
x, y, w, h = cv2.boundingRect(points)
|
| 62 |
+
x = max(x - padding, 0)
|
| 63 |
+
y = max(y - padding, 0)
|
| 64 |
+
w = w + 2 * padding
|
| 65 |
+
h = h + 2 * padding
|
| 66 |
+
|
| 67 |
+
roi = image[y:y+h, x:x+w]
|
| 68 |
+
return roi
|
| 69 |
+
|
| 70 |
+
def preprocess_video_sequence(sequence_dir, detector, predictor, img_size=(64, 64)):
|
| 71 |
+
"""
|
| 72 |
+
Preprocesses a sequence of frames from a video.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
sequence_dir (str): Directory containing frames of a video.
|
| 76 |
+
detector: dlib face detector.
|
| 77 |
+
predictor: dlib shape predictor.
|
| 78 |
+
img_size (tuple): Desired image size for ROIs.
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
list: List of preprocessed frames as numpy arrays.
|
| 82 |
+
"""
|
| 83 |
+
frames = sorted([f for f in os.listdir(sequence_dir) if f.endswith('.jpg') or f.endswith('.png')])
|
| 84 |
+
preprocessed_sequence = []
|
| 85 |
+
|
| 86 |
+
for frame_name in frames:
|
| 87 |
+
frame_path = os.path.join(sequence_dir, frame_name)
|
| 88 |
+
image = cv2.imread(frame_path)
|
| 89 |
+
if image is None:
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
landmarks = get_facial_landmarks(detector, predictor, image)
|
| 93 |
+
if landmarks is None:
|
| 94 |
+
continue # Skip frames with no detected face
|
| 95 |
+
|
| 96 |
+
# Extract ROIs for eyes and eyebrows
|
| 97 |
+
rois = {}
|
| 98 |
+
rois['left_eye'] = extract_roi(image, landmarks, 'left_eye')
|
| 99 |
+
rois['right_eye'] = extract_roi(image, landmarks, 'right_eye')
|
| 100 |
+
rois['left_eyebrow'] = extract_roi(image, landmarks, 'left_eyebrow')
|
| 101 |
+
rois['right_eyebrow'] = extract_roi(image, landmarks, 'right_eyebrow')
|
| 102 |
+
|
| 103 |
+
# Process ROIs
|
| 104 |
+
roi_images = []
|
| 105 |
+
for region in ['left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow']:
|
| 106 |
+
roi = rois.get(region)
|
| 107 |
+
if roi is not None:
|
| 108 |
+
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Convert to grayscale
|
| 109 |
+
roi = cv2.resize(roi, img_size)
|
| 110 |
+
roi = roi.astype('float32') / 255.0 # Normalize to [0,1]
|
| 111 |
+
roi = np.expand_dims(roi, axis=-1) # Add channel dimension
|
| 112 |
+
roi_images.append(roi)
|
| 113 |
+
|
| 114 |
+
if len(roi_images) == 0:
|
| 115 |
+
continue # Skip if no ROIs were extracted
|
| 116 |
+
|
| 117 |
+
# Concatenate ROIs horizontally to form a single image
|
| 118 |
+
combined_roi = np.hstack(roi_images)
|
| 119 |
+
preprocessed_sequence.append(combined_roi)
|
| 120 |
+
|
| 121 |
+
return preprocessed_sequence
|
| 122 |
+
|
| 123 |
+
def preprocess_dataset(dataset_dir='dataset', output_dir='preprocessed_sequences', img_size=(64, 64)):
|
| 124 |
+
"""
|
| 125 |
+
Preprocesses the entire dataset by processing each video sequence.
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
dataset_dir (str): Directory containing labeled data.
|
| 129 |
+
output_dir (str): Directory to save preprocessed sequences.
|
| 130 |
+
img_size (tuple): Desired image size for ROIs.
|
| 131 |
+
"""
|
| 132 |
+
if not os.path.exists(output_dir):
|
| 133 |
+
os.makedirs(output_dir)
|
| 134 |
+
|
| 135 |
+
# Initialize dlib's face detector and landmark predictor
|
| 136 |
+
detector = dlib.get_frontal_face_detector()
|
| 137 |
+
predictor_path = 'shape_predictor_68_face_landmarks.dat'
|
| 138 |
+
|
| 139 |
+
if not os.path.exists(predictor_path):
|
| 140 |
+
print(f"Error: {predictor_path} not found. Download it from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
|
| 141 |
+
return
|
| 142 |
+
|
| 143 |
+
predictor = dlib.shape_predictor(predictor_path)
|
| 144 |
+
|
| 145 |
+
classes = os.listdir(dataset_dir)
|
| 146 |
+
for cls in classes:
|
| 147 |
+
cls_path = os.path.join(dataset_dir, cls)
|
| 148 |
+
if not os.path.isdir(cls_path):
|
| 149 |
+
continue
|
| 150 |
+
output_cls_dir = os.path.join(output_dir, cls)
|
| 151 |
+
if not os.path.exists(output_cls_dir):
|
| 152 |
+
os.makedirs(output_cls_dir)
|
| 153 |
+
|
| 154 |
+
print(f"Processing class: {cls}")
|
| 155 |
+
sequences = os.listdir(cls_path)
|
| 156 |
+
for seq in tqdm(sequences, desc=f"Class {cls}"):
|
| 157 |
+
seq_path = os.path.join(cls_path, seq)
|
| 158 |
+
if not os.path.isdir(seq_path):
|
| 159 |
+
continue
|
| 160 |
+
preprocessed_sequence = preprocess_video_sequence(seq_path, detector, predictor, img_size=img_size)
|
| 161 |
+
if len(preprocessed_sequence) == 0:
|
| 162 |
+
continue # Skip sequences with no valid frames
|
| 163 |
+
|
| 164 |
+
# Stack frames to form a 3D array (frames, height, width, channels)
|
| 165 |
+
sequence_array = np.stack(preprocessed_sequence, axis=0)
|
| 166 |
+
|
| 167 |
+
# Save the preprocessed sequence as a numpy file
|
| 168 |
+
npy_filename = os.path.join(output_cls_dir, f"{seq}.npy")
|
| 169 |
+
np.save(npy_filename, sequence_array)
|
| 170 |
+
|
| 171 |
+
print("Data preprocessing completed.")
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
preprocess_dataset(dataset_dir='dataset', output_dir='preprocessed_sequences', img_size=(64, 64))
|
dataset_preparation_sequences.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# dataset_preparation_sequences.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import numpy as np
|
| 5 |
+
from sklearn.model_selection import train_test_split
|
| 6 |
+
from tensorflow.keras.utils import to_categorical
|
| 7 |
+
import pickle
|
| 8 |
+
|
| 9 |
+
def load_sequences(preprocessed_dir='preprocessed_sequences'):
|
| 10 |
+
"""
|
| 11 |
+
Loads preprocessed sequences and their labels.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
preprocessed_dir (str): Directory containing preprocessed sequences.
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
tuple: Lists of sequences and labels, label mapping dictionary.
|
| 18 |
+
"""
|
| 19 |
+
X = []
|
| 20 |
+
y = []
|
| 21 |
+
label_map = {}
|
| 22 |
+
classes = sorted(os.listdir(preprocessed_dir))
|
| 23 |
+
|
| 24 |
+
for idx, cls in enumerate(classes):
|
| 25 |
+
label_map[cls] = idx
|
| 26 |
+
cls_path = os.path.join(preprocessed_dir, cls)
|
| 27 |
+
if not os.path.isdir(cls_path):
|
| 28 |
+
continue
|
| 29 |
+
sequence_files = [f for f in os.listdir(cls_path) if f.endswith('.npy')]
|
| 30 |
+
for seq_file in sequence_files:
|
| 31 |
+
seq_path = os.path.join(cls_path, seq_file)
|
| 32 |
+
sequence = np.load(seq_path)
|
| 33 |
+
X.append(sequence)
|
| 34 |
+
y.append(idx)
|
| 35 |
+
|
| 36 |
+
# X remains a list of numpy arrays with varying shapes
|
| 37 |
+
y = np.array(y)
|
| 38 |
+
y = to_categorical(y, num_classes=len(label_map))
|
| 39 |
+
|
| 40 |
+
return X, y, label_map
|
| 41 |
+
|
| 42 |
+
def pad_sequences_fixed(X, max_seq_length):
|
| 43 |
+
"""
|
| 44 |
+
Pads or truncates sequences to a fixed length.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
X (list of numpy.ndarray): List of sequences with shape (frames, height, width, channels).
|
| 48 |
+
max_seq_length (int): Desired sequence length.
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
numpy.ndarray: Padded/truncated sequences.
|
| 52 |
+
"""
|
| 53 |
+
padded_X = []
|
| 54 |
+
for seq in X:
|
| 55 |
+
if seq.shape[0] < max_seq_length:
|
| 56 |
+
pad_width = max_seq_length - seq.shape[0]
|
| 57 |
+
padding = np.zeros((pad_width, *seq.shape[1:]), dtype=seq.dtype)
|
| 58 |
+
padded_seq = np.concatenate((seq, padding), axis=0)
|
| 59 |
+
else:
|
| 60 |
+
padded_seq = seq[:max_seq_length]
|
| 61 |
+
padded_X.append(padded_seq)
|
| 62 |
+
return np.array(padded_X)
|
| 63 |
+
|
| 64 |
+
def save_dataset(X_train, X_test, y_train, y_test, label_map, output_path='dataset_sequences.pkl'):
|
| 65 |
+
"""
|
| 66 |
+
Saves the dataset into a pickle file.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
X_train, X_test, y_train, y_test: Split data.
|
| 70 |
+
label_map (dict): Mapping from class names to indices.
|
| 71 |
+
output_path (str): Path to save the pickle file.
|
| 72 |
+
"""
|
| 73 |
+
with open(output_path, 'wb') as f:
|
| 74 |
+
pickle.dump({
|
| 75 |
+
'X_train': X_train,
|
| 76 |
+
'X_test': X_test,
|
| 77 |
+
'y_train': y_train,
|
| 78 |
+
'y_test': y_test,
|
| 79 |
+
'label_map': label_map
|
| 80 |
+
}, f)
|
| 81 |
+
print(f"Dataset saved to {output_path}.")
|
| 82 |
+
|
| 83 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
| 84 |
+
"""
|
| 85 |
+
Loads the dataset from a pickle file.
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
pickle_path (str): Path to the pickle file.
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
tuple: Split data and label mapping.
|
| 92 |
+
"""
|
| 93 |
+
with open(pickle_path, 'rb') as f:
|
| 94 |
+
data = pickle.load(f)
|
| 95 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
| 96 |
+
|
| 97 |
+
if __name__ == "__main__":
|
| 98 |
+
# Load sequences
|
| 99 |
+
X, y, label_map = load_sequences(preprocessed_dir='preprocessed_sequences')
|
| 100 |
+
print(f"Total samples: {len(X)}")
|
| 101 |
+
|
| 102 |
+
# Find the maximum sequence length for padding
|
| 103 |
+
max_seq_length = max([seq.shape[0] for seq in X])
|
| 104 |
+
print(f"Maximum sequence length: {max_seq_length}")
|
| 105 |
+
|
| 106 |
+
# Pad sequences to have the same length
|
| 107 |
+
X_padded = pad_sequences_fixed(X, max_seq_length)
|
| 108 |
+
print(f"Padded sequences shape: {X_padded.shape}")
|
| 109 |
+
|
| 110 |
+
# Split into training and testing sets
|
| 111 |
+
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
|
| 112 |
+
print(f"Training samples: {X_train.shape[0]}")
|
| 113 |
+
print(f"Testing samples: {X_test.shape[0]}")
|
| 114 |
+
|
| 115 |
+
# Save the dataset
|
| 116 |
+
save_dataset(X_train, X_test, y_train, y_test, label_map, output_path='dataset_sequences.pkl')
|
dataset_sequences.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abbe955e05fb92417fc7bc27e88998dcf0c311b06bc0803d47483b9d844893e7
|
| 3 |
+
size 196612009
|
final_model_sequences.keras
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:757fd4422e6703b3a4d9730edaee6643be3e1e32fefe71520d3b2bb341e62be3
|
| 3 |
+
size 342613105
|
frame_extraction.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# frame_extraction.py
|
| 2 |
+
|
| 3 |
+
import cv2
|
| 4 |
+
import os
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
def extract_frames(video_path, output_dir='frames', prefix='frame'):
|
| 8 |
+
"""
|
| 9 |
+
Extracts frames from a video file.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
video_path (str): Path to the input video file.
|
| 13 |
+
output_dir (str): Directory to save extracted frames.
|
| 14 |
+
prefix (str): Prefix for the frame filenames.
|
| 15 |
+
"""
|
| 16 |
+
if not os.path.exists(output_dir):
|
| 17 |
+
os.makedirs(output_dir)
|
| 18 |
+
|
| 19 |
+
cap = cv2.VideoCapture(video_path)
|
| 20 |
+
|
| 21 |
+
if not cap.isOpened():
|
| 22 |
+
print(f"Error: Could not open video {video_path}.")
|
| 23 |
+
return
|
| 24 |
+
|
| 25 |
+
frame_count = 0
|
| 26 |
+
frame_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 27 |
+
|
| 28 |
+
for _ in tqdm(range(frame_total), desc=f"Extracting frames from {os.path.basename(video_path)}"):
|
| 29 |
+
ret, frame = cap.read()
|
| 30 |
+
if not ret:
|
| 31 |
+
break
|
| 32 |
+
frame_filename = os.path.join(output_dir, f"{prefix}_frame_{frame_count}.jpg")
|
| 33 |
+
cv2.imwrite(frame_filename, frame) # Save frame as JPEG file
|
| 34 |
+
frame_count += 1
|
| 35 |
+
|
| 36 |
+
cap.release()
|
| 37 |
+
print(f"Extracted {frame_count} frames from {video_path}.")
|
| 38 |
+
|
| 39 |
+
def process_all_videos(videos_dir='videos', frames_dir='frames'):
|
| 40 |
+
"""
|
| 41 |
+
Processes all videos in the specified directory and extracts frames.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
videos_dir (str): Directory containing video files.
|
| 45 |
+
frames_dir (str): Directory to save extracted frames.
|
| 46 |
+
"""
|
| 47 |
+
if not os.path.exists(frames_dir):
|
| 48 |
+
os.makedirs(frames_dir)
|
| 49 |
+
|
| 50 |
+
video_files = [f for f in os.listdir(videos_dir) if f.endswith('.avi') or f.endswith('.mp4')]
|
| 51 |
+
|
| 52 |
+
for video_file in video_files:
|
| 53 |
+
label = video_file.split('_')[0] # Assuming filename format 'label_something.avi'
|
| 54 |
+
video_path = os.path.join(videos_dir, video_file)
|
| 55 |
+
output_subdir = os.path.join(frames_dir, label)
|
| 56 |
+
if not os.path.exists(output_subdir):
|
| 57 |
+
os.makedirs(output_subdir)
|
| 58 |
+
extract_frames(video_path, output_dir=output_subdir, prefix=video_file.split('.')[0])
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
process_all_videos(videos_dir='videos', frames_dir='frames')
|
history_sequences.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb39ac8c6291f9fccb70c39c7c9eab41d6c3499fe3cccdfcc88c51231fd07440
|
| 3 |
+
size 1876
|
model_building_sequences.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model_building_sequences.py
|
| 2 |
+
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from tensorflow.keras.models import Sequential
|
| 5 |
+
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed, LSTM, BatchNormalization
|
| 6 |
+
from tensorflow.keras.optimizers import Adam
|
| 7 |
+
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
| 8 |
+
import pickle
|
| 9 |
+
|
| 10 |
+
def build_cnn_lstm_model(input_shape, num_classes):
|
| 11 |
+
"""
|
| 12 |
+
Builds a CNN-LSTM model for sequence classification.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
input_shape (tuple): Shape of input sequences (frames, height, width, channels).
|
| 16 |
+
num_classes (int): Number of output classes.
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
tensorflow.keras.Model: Compiled model.
|
| 20 |
+
"""
|
| 21 |
+
model = Sequential()
|
| 22 |
+
|
| 23 |
+
# Apply Conv2D to each frame in the sequence
|
| 24 |
+
model.add(TimeDistributed(Conv2D(32, (3,3), activation='relu'), input_shape=input_shape))
|
| 25 |
+
model.add(TimeDistributed(MaxPooling2D((2,2))))
|
| 26 |
+
model.add(TimeDistributed(BatchNormalization()))
|
| 27 |
+
|
| 28 |
+
# Additional Conv2D layers
|
| 29 |
+
model.add(TimeDistributed(Conv2D(64, (3,3), activation='relu')))
|
| 30 |
+
model.add(TimeDistributed(MaxPooling2D((2,2))))
|
| 31 |
+
model.add(TimeDistributed(BatchNormalization()))
|
| 32 |
+
|
| 33 |
+
# Flatten the output from Conv layers
|
| 34 |
+
model.add(TimeDistributed(Flatten()))
|
| 35 |
+
|
| 36 |
+
# LSTM layer to capture temporal dependencies
|
| 37 |
+
model.add(LSTM(128, return_sequences=False))
|
| 38 |
+
model.add(Dropout(0.5))
|
| 39 |
+
|
| 40 |
+
# Fully connected layer
|
| 41 |
+
model.add(Dense(128, activation='relu'))
|
| 42 |
+
model.add(Dropout(0.5))
|
| 43 |
+
|
| 44 |
+
# Output layer with softmax activation for classification
|
| 45 |
+
model.add(Dense(num_classes, activation='softmax'))
|
| 46 |
+
|
| 47 |
+
# Compile the model with Adam optimizer and categorical cross-entropy loss
|
| 48 |
+
model.compile(optimizer=Adam(learning_rate=1e-4),
|
| 49 |
+
loss='categorical_crossentropy',
|
| 50 |
+
metrics=['accuracy'])
|
| 51 |
+
|
| 52 |
+
return model
|
| 53 |
+
|
| 54 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
| 55 |
+
"""
|
| 56 |
+
Loads the dataset from a pickle file.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
pickle_path (str): Path to the pickle file.
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
tuple: Split data and label mapping.
|
| 63 |
+
"""
|
| 64 |
+
with open(pickle_path, 'rb') as f:
|
| 65 |
+
data = pickle.load(f)
|
| 66 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
| 67 |
+
|
| 68 |
+
def main():
|
| 69 |
+
# Load the dataset
|
| 70 |
+
X_train, X_test, y_train, y_test, label_map = load_dataset_pickle('dataset_sequences.pkl')
|
| 71 |
+
num_classes = y_train.shape[1]
|
| 72 |
+
input_shape = X_train.shape[1:] # (frames, height, width, channels)
|
| 73 |
+
|
| 74 |
+
# Build the CNN-LSTM model
|
| 75 |
+
model = build_cnn_lstm_model(input_shape, num_classes)
|
| 76 |
+
model.summary()
|
| 77 |
+
|
| 78 |
+
# Define callbacks with updated filepath (.keras)
|
| 79 |
+
checkpoint = ModelCheckpoint(
|
| 80 |
+
'best_model_sequences.keras', # Changed from .h5 to .keras
|
| 81 |
+
monitor='val_accuracy',
|
| 82 |
+
save_best_only=True,
|
| 83 |
+
mode='max'
|
| 84 |
+
)
|
| 85 |
+
early_stop = EarlyStopping(
|
| 86 |
+
monitor='val_accuracy',
|
| 87 |
+
patience=10,
|
| 88 |
+
restore_best_weights=True
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Train the model using GPU
|
| 92 |
+
with tf.device('/GPU:0'):
|
| 93 |
+
history = model.fit(
|
| 94 |
+
X_train, y_train,
|
| 95 |
+
epochs=50,
|
| 96 |
+
batch_size=128, # Adjust based on your system's memory
|
| 97 |
+
validation_data=(X_test, y_test)
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Save the final trained model with .keras extension
|
| 101 |
+
model.save('final_model_sequences.keras') # Changed from .h5 to .keras
|
| 102 |
+
print("Model training completed and saved as 'final_model_sequences.keras'.")
|
| 103 |
+
|
| 104 |
+
# Save training history for future reference
|
| 105 |
+
with open('history_sequences.pkl', 'wb') as f:
|
| 106 |
+
pickle.dump(history.history, f)
|
| 107 |
+
print("Training history saved as 'history_sequences.pkl'.")
|
| 108 |
+
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
main()
|
model_evaluation_sequences.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model_evaluation_sequences.py
|
| 2 |
+
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import pickle
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import numpy as np
|
| 7 |
+
import seaborn as sns
|
| 8 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
| 9 |
+
|
| 10 |
+
def load_model(model_path='best_model_sequences.keras'):
|
| 11 |
+
"""
|
| 12 |
+
Loads the trained model.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
model_path (str): Path to the saved model.
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
tensorflow.keras.Model: Loaded model.
|
| 19 |
+
"""
|
| 20 |
+
model = tf.keras.models.load_model(model_path)
|
| 21 |
+
return model
|
| 22 |
+
|
| 23 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
| 24 |
+
"""
|
| 25 |
+
Loads the dataset from a pickle file.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
pickle_path (str): Path to the pickle file.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
tuple: Split data and label mapping.
|
| 32 |
+
"""
|
| 33 |
+
with open(pickle_path, 'rb') as f:
|
| 34 |
+
data = pickle.load(f)
|
| 35 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
| 36 |
+
|
| 37 |
+
def plot_history(history):
|
| 38 |
+
"""
|
| 39 |
+
Plots the training and validation accuracy and loss.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
history (dict): Training history.
|
| 43 |
+
"""
|
| 44 |
+
acc = history.get('accuracy', history.get('acc'))
|
| 45 |
+
val_acc = history.get('val_accuracy', history.get('val_acc'))
|
| 46 |
+
|
| 47 |
+
loss = history['loss']
|
| 48 |
+
val_loss = history['val_loss']
|
| 49 |
+
|
| 50 |
+
epochs = range(1, len(acc) + 1)
|
| 51 |
+
|
| 52 |
+
plt.figure(figsize=(14,5))
|
| 53 |
+
|
| 54 |
+
plt.subplot(1,2,1)
|
| 55 |
+
plt.plot(epochs, acc, 'b', label='Training accuracy')
|
| 56 |
+
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
|
| 57 |
+
plt.title('Training and Validation Accuracy')
|
| 58 |
+
plt.xlabel('Epochs')
|
| 59 |
+
plt.ylabel('Accuracy')
|
| 60 |
+
plt.legend()
|
| 61 |
+
|
| 62 |
+
plt.subplot(1,2,2)
|
| 63 |
+
plt.plot(epochs, loss, 'b', label='Training loss')
|
| 64 |
+
plt.plot(epochs, val_loss, 'r', label='Validation loss')
|
| 65 |
+
plt.title('Training and Validation Loss')
|
| 66 |
+
plt.xlabel('Epochs')
|
| 67 |
+
plt.ylabel('Loss')
|
| 68 |
+
plt.legend()
|
| 69 |
+
|
| 70 |
+
plt.show()
|
| 71 |
+
|
| 72 |
+
def evaluate_model(model, X_test, y_test, label_map):
|
| 73 |
+
"""
|
| 74 |
+
Evaluates the model on the test set.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
model (tensorflow.keras.Model): Trained model.
|
| 78 |
+
X_test (numpy.ndarray): Test sequences.
|
| 79 |
+
y_test (numpy.ndarray): Test labels.
|
| 80 |
+
label_map (dict): Mapping from class names to indices.
|
| 81 |
+
"""
|
| 82 |
+
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
|
| 83 |
+
print(f"Test Accuracy: {accuracy * 100:.2f}%")
|
| 84 |
+
print(f"Test Loss: {loss:.4f}")
|
| 85 |
+
|
| 86 |
+
# Predictions
|
| 87 |
+
y_pred = model.predict(X_test)
|
| 88 |
+
y_pred_classes = np.argmax(y_pred, axis=1)
|
| 89 |
+
y_true = np.argmax(y_test, axis=1)
|
| 90 |
+
|
| 91 |
+
# Confusion Matrix
|
| 92 |
+
cm = confusion_matrix(y_true, y_pred_classes)
|
| 93 |
+
plt.figure(figsize=(10,8))
|
| 94 |
+
sns.heatmap(cm, annot=True, fmt='d', xticklabels=label_map.keys(), yticklabels=label_map.keys(), cmap='Blues')
|
| 95 |
+
plt.xlabel('Predicted')
|
| 96 |
+
plt.ylabel('True')
|
| 97 |
+
plt.title('Confusion Matrix')
|
| 98 |
+
plt.show()
|
| 99 |
+
|
| 100 |
+
# Classification Report
|
| 101 |
+
print("Classification Report:")
|
| 102 |
+
print(classification_report(y_true, y_pred_classes, target_names=label_map.keys()))
|
| 103 |
+
|
| 104 |
+
def main():
|
| 105 |
+
# Load the trained model
|
| 106 |
+
model = load_model('best_model_sequences.keras')
|
| 107 |
+
|
| 108 |
+
# Load the dataset
|
| 109 |
+
X_train, X_test, y_train, y_test, label_map = load_dataset_pickle('dataset_sequences.pkl')
|
| 110 |
+
|
| 111 |
+
# Evaluate the model
|
| 112 |
+
evaluate_model(model, X_test, y_test, label_map)
|
| 113 |
+
|
| 114 |
+
# Load and plot training history
|
| 115 |
+
try:
|
| 116 |
+
with open('history_sequences.pkl', 'rb') as f:
|
| 117 |
+
history = pickle.load(f)
|
| 118 |
+
plot_history(history)
|
| 119 |
+
except FileNotFoundError:
|
| 120 |
+
print("Training history not found. Skipping plotting.")
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
main()
|
prediction_sequences.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# prediction_sequences.py
|
| 2 |
+
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
import dlib
|
| 7 |
+
from imutils import face_utils
|
| 8 |
+
import os
|
| 9 |
+
import pickle
|
| 10 |
+
from collections import deque
|
| 11 |
+
import threading
|
| 12 |
+
import queue
|
| 13 |
+
import time
|
| 14 |
+
|
| 15 |
+
def load_model(model_path='final_model_sequences.keras'):
|
| 16 |
+
"""
|
| 17 |
+
Loads the trained model.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
model_path (str): Path to the saved model.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
tensorflow.keras.Model: Loaded model.
|
| 24 |
+
"""
|
| 25 |
+
model = tf.keras.models.load_model(model_path)
|
| 26 |
+
return model
|
| 27 |
+
|
| 28 |
+
def get_facial_landmarks(detector, predictor, image):
|
| 29 |
+
"""
|
| 30 |
+
Detects facial landmarks in an image.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
detector: dlib face detector.
|
| 34 |
+
predictor: dlib shape predictor.
|
| 35 |
+
image (numpy.ndarray): Input image.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
dict: Coordinates of eyes and eyebrows.
|
| 39 |
+
"""
|
| 40 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 41 |
+
rects = detector(gray, 1)
|
| 42 |
+
|
| 43 |
+
if len(rects) == 0:
|
| 44 |
+
return None # No face detected
|
| 45 |
+
|
| 46 |
+
# Assuming the first detected face is the target
|
| 47 |
+
rect = rects[0]
|
| 48 |
+
shape = predictor(gray, rect)
|
| 49 |
+
shape = face_utils.shape_to_np(shape)
|
| 50 |
+
|
| 51 |
+
landmarks = {}
|
| 52 |
+
# Define landmarks for left and right eyes and eyebrows
|
| 53 |
+
landmarks['left_eye'] = shape[36:42] # Left eye landmarks
|
| 54 |
+
landmarks['right_eye'] = shape[42:48] # Right eye landmarks
|
| 55 |
+
landmarks['left_eyebrow'] = shape[17:22] # Left eyebrow landmarks
|
| 56 |
+
landmarks['right_eyebrow'] = shape[22:27] # Right eyebrow landmarks
|
| 57 |
+
|
| 58 |
+
return landmarks
|
| 59 |
+
|
| 60 |
+
def extract_roi(image, landmarks, region='left_eye', padding=5):
|
| 61 |
+
"""
|
| 62 |
+
Extracts a region of interest (ROI) from the image based on landmarks.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
image (numpy.ndarray): Input image.
|
| 66 |
+
landmarks (dict): Facial landmarks.
|
| 67 |
+
region (str): Region to extract ('left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow').
|
| 68 |
+
padding (int): Padding around the ROI.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
numpy.ndarray: Extracted ROI.
|
| 72 |
+
"""
|
| 73 |
+
points = landmarks.get(region)
|
| 74 |
+
if points is None:
|
| 75 |
+
return None
|
| 76 |
+
|
| 77 |
+
# Compute the bounding box
|
| 78 |
+
x, y, w, h = cv2.boundingRect(points)
|
| 79 |
+
x = max(x - padding, 0)
|
| 80 |
+
y = max(y - padding, 0)
|
| 81 |
+
w = w + 2 * padding
|
| 82 |
+
h = h + 2 * padding
|
| 83 |
+
|
| 84 |
+
roi = image[y:y+h, x:x+w]
|
| 85 |
+
return roi
|
| 86 |
+
|
| 87 |
+
def preprocess_frame(image, detector, predictor, img_size=(64, 64)):
|
| 88 |
+
"""
|
| 89 |
+
Preprocesses a single frame: detects landmarks, extracts ROIs, and prepares the input.
|
| 90 |
+
|
| 91 |
+
Args:
|
| 92 |
+
image (numpy.ndarray): Input frame.
|
| 93 |
+
detector: dlib face detector.
|
| 94 |
+
predictor: dlib shape predictor.
|
| 95 |
+
img_size (tuple): Desired image size for ROIs.
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
numpy.ndarray: Preprocessed frame as a concatenated ROI image.
|
| 99 |
+
"""
|
| 100 |
+
landmarks = get_facial_landmarks(detector, predictor, image)
|
| 101 |
+
if landmarks is None:
|
| 102 |
+
return None # No face detected
|
| 103 |
+
|
| 104 |
+
# Extract ROIs for eyes and eyebrows
|
| 105 |
+
rois = {}
|
| 106 |
+
rois['left_eye'] = extract_roi(image, landmarks, 'left_eye')
|
| 107 |
+
rois['right_eye'] = extract_roi(image, landmarks, 'right_eye')
|
| 108 |
+
rois['left_eyebrow'] = extract_roi(image, landmarks, 'left_eyebrow')
|
| 109 |
+
rois['right_eyebrow'] = extract_roi(image, landmarks, 'right_eyebrow')
|
| 110 |
+
|
| 111 |
+
# Process ROIs
|
| 112 |
+
roi_images = []
|
| 113 |
+
for region in ['left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow']:
|
| 114 |
+
roi = rois.get(region)
|
| 115 |
+
if roi is not None:
|
| 116 |
+
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Convert to grayscale
|
| 117 |
+
roi = cv2.resize(roi, img_size)
|
| 118 |
+
roi = roi.astype('float32') / 255.0 # Normalize to [0,1]
|
| 119 |
+
roi = np.expand_dims(roi, axis=-1) # Add channel dimension
|
| 120 |
+
roi_images.append(roi)
|
| 121 |
+
|
| 122 |
+
if len(roi_images) == 0:
|
| 123 |
+
return None # No ROIs extracted
|
| 124 |
+
|
| 125 |
+
# Concatenate ROIs horizontally to form a single image
|
| 126 |
+
combined_roi = np.hstack(roi_images)
|
| 127 |
+
return combined_roi
|
| 128 |
+
|
| 129 |
+
def movement_to_text(label_map):
|
| 130 |
+
"""
|
| 131 |
+
Creates a mapping from class indices to text.
|
| 132 |
+
|
| 133 |
+
Args:
|
| 134 |
+
label_map (dict): Mapping from class names to indices.
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
dict: Mapping from indices to text descriptions.
|
| 138 |
+
"""
|
| 139 |
+
movement_to_text_map = {
|
| 140 |
+
'upward_eyebrow': 'Eyebrow Raised',
|
| 141 |
+
'downward_eyebrow': 'Eyebrow Lowered',
|
| 142 |
+
'left_eye': 'Left Eye Movement',
|
| 143 |
+
'right_eye': 'Right Eye Movement',
|
| 144 |
+
# Add more mappings as needed
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
# Create index to text mapping
|
| 148 |
+
index_to_text = {}
|
| 149 |
+
for cls, idx in label_map.items():
|
| 150 |
+
text = movement_to_text_map.get(cls, cls)
|
| 151 |
+
index_to_text[idx] = text
|
| 152 |
+
return index_to_text
|
| 153 |
+
|
| 154 |
+
def prediction_worker(model, input_queue, output_queue, max_seq_length):
|
| 155 |
+
"""
|
| 156 |
+
Worker thread for handling model predictions.
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
model (tensorflow.keras.Model): Trained model.
|
| 160 |
+
input_queue (queue.Queue): Queue to receive sequences for prediction.
|
| 161 |
+
output_queue (queue.Queue): Queue to send prediction results.
|
| 162 |
+
max_seq_length (int): Fixed sequence length for the model.
|
| 163 |
+
"""
|
| 164 |
+
while True:
|
| 165 |
+
sequence = input_queue.get()
|
| 166 |
+
if sequence is None:
|
| 167 |
+
break # Sentinel to stop the thread
|
| 168 |
+
|
| 169 |
+
# Pad or truncate the sequence to match the model's expected input
|
| 170 |
+
if sequence.shape[0] < max_seq_length:
|
| 171 |
+
pad_width = max_seq_length - sequence.shape[0]
|
| 172 |
+
padding = np.zeros((pad_width, *sequence.shape[1:]), dtype=sequence.dtype)
|
| 173 |
+
sequence_padded = np.concatenate((sequence, padding), axis=0)
|
| 174 |
+
else:
|
| 175 |
+
sequence_padded = sequence[:max_seq_length]
|
| 176 |
+
|
| 177 |
+
# Expand dimensions to match model input (1, frames, height, width, channels)
|
| 178 |
+
sequence_padded = np.expand_dims(sequence_padded, axis=0)
|
| 179 |
+
|
| 180 |
+
# Perform prediction
|
| 181 |
+
prediction = model.predict(sequence_padded)
|
| 182 |
+
class_idx = np.argmax(prediction)
|
| 183 |
+
confidence = np.max(prediction)
|
| 184 |
+
|
| 185 |
+
# Put the result in the output queue
|
| 186 |
+
output_queue.put((class_idx, confidence))
|
| 187 |
+
|
| 188 |
+
def main():
|
| 189 |
+
# Load the trained model
|
| 190 |
+
model = load_model('final_model_sequences.keras')
|
| 191 |
+
|
| 192 |
+
# Load label map
|
| 193 |
+
with open('dataset_sequences.pkl', 'rb') as f:
|
| 194 |
+
data = pickle.load(f)
|
| 195 |
+
label_map = data['label_map']
|
| 196 |
+
index_to_text = movement_to_text(label_map)
|
| 197 |
+
|
| 198 |
+
# Initialize dlib's face detector and landmark predictor
|
| 199 |
+
detector = dlib.get_frontal_face_detector()
|
| 200 |
+
predictor_path = 'shape_predictor_68_face_landmarks.dat'
|
| 201 |
+
|
| 202 |
+
if not os.path.exists(predictor_path):
|
| 203 |
+
print(f"Error: {predictor_path} not found. Download it from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
|
| 204 |
+
return
|
| 205 |
+
|
| 206 |
+
predictor = dlib.shape_predictor(predictor_path)
|
| 207 |
+
|
| 208 |
+
# Initialize queues for communication between threads
|
| 209 |
+
input_queue = queue.Queue()
|
| 210 |
+
output_queue = queue.Queue()
|
| 211 |
+
|
| 212 |
+
# Define sequence length (number of frames)
|
| 213 |
+
max_seq_length = 20 # Adjust based on your training data
|
| 214 |
+
|
| 215 |
+
# Start the prediction worker thread
|
| 216 |
+
pred_thread = threading.Thread(target=prediction_worker, args=(model, input_queue, output_queue, max_seq_length))
|
| 217 |
+
pred_thread.daemon = True
|
| 218 |
+
pred_thread.start()
|
| 219 |
+
|
| 220 |
+
# Start video capture
|
| 221 |
+
cap = cv2.VideoCapture(0)
|
| 222 |
+
|
| 223 |
+
if not cap.isOpened():
|
| 224 |
+
print("Error: Could not open webcam.")
|
| 225 |
+
return
|
| 226 |
+
|
| 227 |
+
print("Starting real-time prediction. Press 'q' to quit.")
|
| 228 |
+
|
| 229 |
+
# Initialize a deque to store the sequence of preprocessed frames
|
| 230 |
+
frame_buffer = deque(maxlen=max_seq_length)
|
| 231 |
+
|
| 232 |
+
# Variable to store the latest prediction result
|
| 233 |
+
latest_prediction = "Initializing..."
|
| 234 |
+
|
| 235 |
+
while True:
|
| 236 |
+
ret, frame = cap.read()
|
| 237 |
+
if not ret:
|
| 238 |
+
print("Failed to grab frame.")
|
| 239 |
+
break
|
| 240 |
+
|
| 241 |
+
# Preprocess the current frame
|
| 242 |
+
preprocessed_frame = preprocess_frame(frame, detector, predictor, img_size=(64, 64))
|
| 243 |
+
if preprocessed_frame is not None:
|
| 244 |
+
frame_buffer.append(preprocessed_frame)
|
| 245 |
+
else:
|
| 246 |
+
# If no face detected, append a zero array to maintain sequence length
|
| 247 |
+
frame_buffer.append(np.zeros((64, 256, 1), dtype='float32'))
|
| 248 |
+
|
| 249 |
+
# If the buffer is full, send the sequence to the prediction thread
|
| 250 |
+
if len(frame_buffer) == max_seq_length:
|
| 251 |
+
# Convert deque to numpy array
|
| 252 |
+
sequence_array = np.array(frame_buffer)
|
| 253 |
+
input_queue.put(sequence_array)
|
| 254 |
+
|
| 255 |
+
# Check if there's a new prediction result
|
| 256 |
+
try:
|
| 257 |
+
while True:
|
| 258 |
+
class_idx, confidence = output_queue.get_nowait()
|
| 259 |
+
movement = index_to_text.get(class_idx, "Unknown")
|
| 260 |
+
latest_prediction = f"{movement} ({confidence*100:.2f}%)"
|
| 261 |
+
except queue.Empty:
|
| 262 |
+
pass # No new prediction
|
| 263 |
+
|
| 264 |
+
# Display the prediction on the frame
|
| 265 |
+
cv2.putText(frame, latest_prediction, (30, 30), cv2.FONT_HERSHEY_SIMPLEX,
|
| 266 |
+
0.8, (0, 255, 0), 2, cv2.LINE_AA)
|
| 267 |
+
|
| 268 |
+
# Display the frame
|
| 269 |
+
cv2.imshow('Real-time Movement Prediction', frame)
|
| 270 |
+
|
| 271 |
+
# Exit condition
|
| 272 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
| 273 |
+
break
|
| 274 |
+
|
| 275 |
+
# Cleanup
|
| 276 |
+
cap.release()
|
| 277 |
+
cv2.destroyAllWindows()
|
| 278 |
+
|
| 279 |
+
# Stop the prediction thread
|
| 280 |
+
input_queue.put(None) # Sentinel to stop the thread
|
| 281 |
+
pred_thread.join()
|
| 282 |
+
|
| 283 |
+
if __name__ == "__main__":
|
| 284 |
+
main()
|
shape_predictor_68_face_landmarks.dat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
| 3 |
+
size 99693937
|
video_capture.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# video_capture.py
|
| 2 |
+
|
| 3 |
+
import cv2
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
def record_video(duration=2, output_dir='videos', filename='sample'):
|
| 7 |
+
"""
|
| 8 |
+
Records a short video from the webcam.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
duration (int): Duration of the video in seconds.
|
| 12 |
+
output_dir (str): Directory to save the videos.
|
| 13 |
+
filename (str): Name of the output video file.
|
| 14 |
+
"""
|
| 15 |
+
if not os.path.exists(output_dir):
|
| 16 |
+
os.makedirs(output_dir)
|
| 17 |
+
|
| 18 |
+
cap = cv2.VideoCapture(0) # Initialize webcam
|
| 19 |
+
|
| 20 |
+
if not cap.isOpened():
|
| 21 |
+
print("Error: Could not open webcam.")
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
# Get default camera resolution
|
| 25 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 26 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 27 |
+
fps = 20 # Frames per second
|
| 28 |
+
|
| 29 |
+
# Define the codec and create VideoWriter object
|
| 30 |
+
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 31 |
+
out = cv2.VideoWriter(os.path.join(output_dir, f"{filename}.avi"), fourcc, fps, (frame_width, frame_height))
|
| 32 |
+
|
| 33 |
+
print("Recording started. Press 'q' to stop early.")
|
| 34 |
+
|
| 35 |
+
frame_count = 0
|
| 36 |
+
total_frames = duration * fps
|
| 37 |
+
|
| 38 |
+
while frame_count < total_frames:
|
| 39 |
+
ret, frame = cap.read()
|
| 40 |
+
if ret:
|
| 41 |
+
out.write(frame) # Write frame to video file
|
| 42 |
+
cv2.imshow('Recording', frame)
|
| 43 |
+
frame_count += 1
|
| 44 |
+
|
| 45 |
+
# Press 'q' to quit early
|
| 46 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
| 47 |
+
break
|
| 48 |
+
else:
|
| 49 |
+
print("Failed to grab frame.")
|
| 50 |
+
break
|
| 51 |
+
|
| 52 |
+
# Release resources
|
| 53 |
+
cap.release()
|
| 54 |
+
out.release()
|
| 55 |
+
cv2.destroyAllWindows()
|
| 56 |
+
print(f"Recording finished. Video saved as {filename}.avi")
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
# Example: Record a 2-second video named 'movement1'
|
| 60 |
+
label = input("Enter movement label (e.g., 'upward_eyebrow'): ")
|
| 61 |
+
filename = input("Enter filename (e.g., 'movement1'): ")
|
| 62 |
+
record_video(duration=2, filename=filename)
|