Spaces:

rnair
/

antisomnus

Runtime error

File size: 5,633 Bytes

8ff5f44

import cv2
import math
import random
import numpy as np
import mediapipe as mp

from scipy.spatial.distance import euclidean as dist
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# feature definitions


DIMS = (224,224,3) # dimensions of the image
RIGHT = [[33, 133], [160, 144], [159, 145], [158, 153]] # right eye landmark positions
LEFT = [[263, 362], [387, 373], [386, 374], [385, 380]] # left eye landmark positions
MOUTH = [[61, 291], [39, 181], [0, 17], [269, 405]] # mouth landmark coordinates

EYE_AR_THRESH = 0.45
PROB_THRESH = 0.3
EYE_AR_CONSEC_FRAMES = 15

MOUTH_AR_THRESH = 0.33
MOUTH_AR_CONSEC_FRAMES = 20

MP_FACE_DETECTION = mp.solutions.face_detection
MP_DRAWING = mp.solutions.drawing_utils
MP_DRAWING_STYLES = mp.solutions.drawing_styles
MP_FACE_MESH = mp.solutions.face_mesh
DRAWING_SPEC = MP_DRAWING.DrawingSpec(thickness=1, circle_radius=1)

def get_ear(landmarks,eye):
    ''' Calculate the ratio of the eye length to eye width. 
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :param eye: List containing positions which correspond to the eye
    :return: Eye aspect ratio value
    '''
    N1 = dist(landmarks[eye[1][0]], landmarks[eye[1][1]])
    N2 = dist(landmarks[eye[2][0]], landmarks[eye[2][1]])
    N3 = dist(landmarks[eye[3][0]], landmarks[eye[3][1]])
    D = dist(landmarks[eye[0][0]], landmarks[eye[0][1]])
    return (N1 + N2 + N3) / (3 * D)

def get_eye_feature(landmarks):
    ''' Calculate the eye feature as the average of the eye aspect ratio for the two eyes
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :return: Eye feature value
    '''
    return (get_ear(landmarks,LEFT) + get_ear(landmarks,RIGHT))

def get_mouth_feature(landmarks):
    ''' Calculate mouth feature as the ratio of the mouth length to mouth width
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :return: Mouth feature value
    '''
    n_1 = dist(landmarks[MOUTH[1][0]], landmarks[MOUTH[1][1]])
    n_2 = dist(landmarks[MOUTH[2][0]], landmarks[MOUTH[2][1]])
    n_3 = dist(landmarks[MOUTH[3][0]], landmarks[MOUTH[3][1]])
    dst = dist(landmarks[MOUTH[0][0]], landmarks[MOUTH[0][1]])
    return (n_1 + n_2 + n_3)/(3*dst)

# image processing


def process_mp_img(frame):
    """
    returns features and/or processed image
    """
    with MP_FACE_MESH.FaceMesh(
        min_detection_confidence=0.3,
        min_tracking_confidence=0.8) as face_mesh:
        # convert the img to RGB and process it with MediaPipe Face Detection
        results = face_mesh.process(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))

        if results.multi_face_landmarks is not None:
            landmark_pos = []
            for i, data in enumerate(results.multi_face_landmarks[0].landmark):
                landmark_pos.append([data.x, data.y, data.z])
            landmark_pos = np.array(landmark_pos)

            # draw face detections of each face
            annotated_img = frame.copy()
            for face_landmarks in results.multi_face_landmarks:
                # Calculate eye and mouth features
                eye_feature = get_eye_feature(landmark_pos)
                mouth_feature = get_mouth_feature(landmark_pos)

                # Binary classification: drowsy (1) or non-drowsy (0)
                drowsy = (eye_feature <= EYE_AR_THRESH) or (mouth_feature > MOUTH_AR_THRESH)       
                # face mesh
                MP_DRAWING.draw_landmarks(
                    image=annotated_img,
                    landmark_list=face_landmarks,
                    connections=MP_FACE_MESH.FACEMESH_TESSELATION,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=MP_DRAWING_STYLES
                    .get_default_face_mesh_tesselation_style()
                )
                # eyes and mouth regions
                MP_DRAWING.draw_landmarks(
                    image=annotated_img,
                    landmark_list=face_landmarks,
                    connections=MP_FACE_MESH.FACEMESH_CONTOURS,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=MP_DRAWING_STYLES
                    .get_default_face_mesh_contours_style()
                )
    return annotated_img, eye_feature, mouth_feature, drowsy



def mediapipe_process(frames):
    """
    Process all videos using MediaPipe and returns a 
    dictionary with the eye and mouth features in 
    the format {frame_number: {"eye_feature":0, "mouth_feature":0, "drowsy":0}}
    """
    mp_features = {}
    eye_features_all = []
    mouth_features_all = []
    # Extract eye and mouth features for all videos
    for frame in frames:
        mp_features[frame] = {"eye_feature": 0, "mouth_feature": 0, "drowsy": 0}
        _,eye_feature,mouth_feature,drowsy = process_mp_img(frame)
        mp_features[frame]["eye_feature"] = eye_feature
        mp_features[frame]["mouth_feature"] = mouth_feature
        mp_features[frame]["drowsy"] = drowsy
        eye_features_all.append(eye_feature)
        mouth_features_all.append(mouth_feature)

    # Calculate mean and standard deviation for normalization
    eye_mean, eye_std = np.mean(eye_features_all), np.std(eye_features_all)
    mouth_mean, mouth_std = np.mean(mouth_features_all), np.std(mouth_features_all)

    # Normalize eye and mouth features for all videos
    for frame,features in mp_features.items():
        features["eye_feature"] = (features["eye_feature"] - eye_mean) / eye_std
        features[frame]["mouth_feature"] = (features["mouth_feature"] - mouth_mean) / mouth_std

    return mp_features