File size: 5,633 Bytes
8ff5f44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import cv2
import math
import random
import numpy as np
import mediapipe as mp

from scipy.spatial.distance import euclidean as dist
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# feature definitions


DIMS = (224,224,3) # dimensions of the image
RIGHT = [[33, 133], [160, 144], [159, 145], [158, 153]] # right eye landmark positions
LEFT = [[263, 362], [387, 373], [386, 374], [385, 380]] # left eye landmark positions
MOUTH = [[61, 291], [39, 181], [0, 17], [269, 405]] # mouth landmark coordinates

EYE_AR_THRESH = 0.45
PROB_THRESH = 0.3
EYE_AR_CONSEC_FRAMES = 15

MOUTH_AR_THRESH = 0.33
MOUTH_AR_CONSEC_FRAMES = 20

MP_FACE_DETECTION = mp.solutions.face_detection
MP_DRAWING = mp.solutions.drawing_utils
MP_DRAWING_STYLES = mp.solutions.drawing_styles
MP_FACE_MESH = mp.solutions.face_mesh
DRAWING_SPEC = MP_DRAWING.DrawingSpec(thickness=1, circle_radius=1)

def get_ear(landmarks,eye):
    ''' Calculate the ratio of the eye length to eye width. 
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :param eye: List containing positions which correspond to the eye
    :return: Eye aspect ratio value
    '''
    N1 = dist(landmarks[eye[1][0]], landmarks[eye[1][1]])
    N2 = dist(landmarks[eye[2][0]], landmarks[eye[2][1]])
    N3 = dist(landmarks[eye[3][0]], landmarks[eye[3][1]])
    D = dist(landmarks[eye[0][0]], landmarks[eye[0][1]])
    return (N1 + N2 + N3) / (3 * D)

def get_eye_feature(landmarks):
    ''' Calculate the eye feature as the average of the eye aspect ratio for the two eyes
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :return: Eye feature value
    '''
    return (get_ear(landmarks,LEFT) + get_ear(landmarks,RIGHT))

def get_mouth_feature(landmarks):
    ''' Calculate mouth feature as the ratio of the mouth length to mouth width
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :return: Mouth feature value
    '''
    n_1 = dist(landmarks[MOUTH[1][0]], landmarks[MOUTH[1][1]])
    n_2 = dist(landmarks[MOUTH[2][0]], landmarks[MOUTH[2][1]])
    n_3 = dist(landmarks[MOUTH[3][0]], landmarks[MOUTH[3][1]])
    dst = dist(landmarks[MOUTH[0][0]], landmarks[MOUTH[0][1]])
    return (n_1 + n_2 + n_3)/(3*dst)

# image processing


def process_mp_img(frame):
    """
    returns features and/or processed image
    """
    with MP_FACE_MESH.FaceMesh(
        min_detection_confidence=0.3,
        min_tracking_confidence=0.8) as face_mesh:
        # convert the img to RGB and process it with MediaPipe Face Detection
        results = face_mesh.process(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))

        if results.multi_face_landmarks is not None:
            landmark_pos = []
            for i, data in enumerate(results.multi_face_landmarks[0].landmark):
                landmark_pos.append([data.x, data.y, data.z])
            landmark_pos = np.array(landmark_pos)

            # draw face detections of each face
            annotated_img = frame.copy()
            for face_landmarks in results.multi_face_landmarks:
                # Calculate eye and mouth features
                eye_feature = get_eye_feature(landmark_pos)
                mouth_feature = get_mouth_feature(landmark_pos)

                # Binary classification: drowsy (1) or non-drowsy (0)
                drowsy = (eye_feature <= EYE_AR_THRESH) or (mouth_feature > MOUTH_AR_THRESH)       
                # face mesh
                MP_DRAWING.draw_landmarks(
                    image=annotated_img,
                    landmark_list=face_landmarks,
                    connections=MP_FACE_MESH.FACEMESH_TESSELATION,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=MP_DRAWING_STYLES
                    .get_default_face_mesh_tesselation_style()
                )
                # eyes and mouth regions
                MP_DRAWING.draw_landmarks(
                    image=annotated_img,
                    landmark_list=face_landmarks,
                    connections=MP_FACE_MESH.FACEMESH_CONTOURS,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=MP_DRAWING_STYLES
                    .get_default_face_mesh_contours_style()
                )
    return annotated_img, eye_feature, mouth_feature, drowsy



def mediapipe_process(frames):
    """
    Process all videos using MediaPipe and returns a 
    dictionary with the eye and mouth features in 
    the format {frame_number: {"eye_feature":0, "mouth_feature":0, "drowsy":0}}
    """
    mp_features = {}
    eye_features_all = []
    mouth_features_all = []
    # Extract eye and mouth features for all videos
    for frame in frames:
        mp_features[frame] = {"eye_feature": 0, "mouth_feature": 0, "drowsy": 0}
        _,eye_feature,mouth_feature,drowsy = process_mp_img(frame)
        mp_features[frame]["eye_feature"] = eye_feature
        mp_features[frame]["mouth_feature"] = mouth_feature
        mp_features[frame]["drowsy"] = drowsy
        eye_features_all.append(eye_feature)
        mouth_features_all.append(mouth_feature)

    # Calculate mean and standard deviation for normalization
    eye_mean, eye_std = np.mean(eye_features_all), np.std(eye_features_all)
    mouth_mean, mouth_std = np.mean(mouth_features_all), np.std(mouth_features_all)

    # Normalize eye and mouth features for all videos
    for frame,features in mp_features.items():
        features["eye_feature"] = (features["eye_feature"] - eye_mean) / eye_std
        features[frame]["mouth_feature"] = (features["mouth_feature"] - mouth_mean) / mouth_std

    return mp_features