File size: 6,349 Bytes
95ab51c
 
 
 
 
 
 
 
8e9e1d4
 
 
 
 
 
95ab51c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4802794
95ab51c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9160ab9
 
95ab51c
 
 
 
56892ee
95ab51c
24ea1ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
import gradio as gr
import tempfile
import os

base_dir = os.getcwd()
saved_model_dir = os.path.join(base_dir, 'saved_model')

# Loading the trained CNN model
model = load_model(saved_model_dir)


# Initializing the MTCNN face detector
detector = MTCNN()


# Making a function for fetching roi coordinates, performing classification and displaying image having detection
def classify_faces(img):
    faces = detector.detect_faces(img)
    sleepy_faces = 0

    for face in faces:
        x, y, w, h = face['box']
        x1 = face['keypoints']['left_eye'][0]
        y1 = face['keypoints']['left_eye'][1]
        x2 = face['keypoints']['right_eye'][0]
        y2 = face['keypoints']['right_eye'][1]

        # Calculating the distance between the eyes
        eye_distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

        if abs(x2 - x1) > abs(y2 - y1):
            # For larger horizontal distances between eyes
            roi_w = int(5 / 3 * eye_distance)
            roi_h = int(2 / 3 * eye_distance)
        else:
            # For larger vertical distances between eyes
            roi_w = int(2 / 3 * eye_distance)
            roi_h = int(5 / 3 * eye_distance)

        # Calculating the center point between the eyes
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Adjusting ROI coordinates to keep the center point between the eyes (It essentially grabs the top left
        # coordinate of the roi box)
        roi_x = int(center_x - roi_w / 2)
        roi_y = int(center_y - roi_h / 2)

        # Ensuring the ROI is within image boundaries
        roi_x = max(0, roi_x)
        roi_y = max(0, roi_y)
        roi_w = min(roi_w, img.shape[1] - roi_x)
        roi_h = min(roi_h, img.shape[0] - roi_y)

        crop = img[roi_y:roi_y + roi_h, roi_x:roi_x + roi_w]

        # Preprocessing the cropped face image as required by your model
        crop_resized = cv2.resize(crop, (224, 224))  # Assuming your model expects 224x224 input
        crop_resized = crop_resized.astype('float32') / 255.0  # Normalize if required
        crop_resized = np.expand_dims(crop_resized, axis=0)  # Add batch dimension

        prediction = model.predict(crop_resized)
        label = 'Awake' if prediction[0][0] < 0.5 else 'Sleepy'

        if label == 'Sleepy':
            sleepy_faces += 1
            # Drawing bounding box around drowsy face
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
            # Putting text label above the bounding box
            cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    # Displaying the count of sleepy faces detected
    cv2.putText(img, f'Sleepy faces: {sleepy_faces}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    return img, sleepy_faces


def process_image(image_path):
    
    img = cv2.imread(image_path)

    if img is None:
        raise ValueError(f"Unable to load image from {image_path}")

    # Converting BGR to RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resizing the image to fit within a fixed window size while maintaining aspect ratio
    max_display_size = 800  # Maximum width or height for displaying the image
    height, width, _ = img_rgb.shape
    if max(height, width) > max_display_size:
        if height > width:
            new_height = max_display_size
            new_width = int(width * (max_display_size / height))
        else:
            new_width = max_display_size
            new_height = int(height * (max_display_size / width))
        img_rgb = cv2.resize(img_rgb, (new_width, new_height))

    # Classifying faces and retrieving image with bounding boxes
    img_with_boxes, sleepy_faces = classify_faces(img_rgb)

    # Converting back to BGR for saving with OpenCV
    img_with_boxes_bgr = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR)

    return img_with_boxes_bgr, f'Sleepy faces detected: {sleepy_faces}'


def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    max_sleepy_faces = 0

    # Obtaining frame dimensions and FPS from the video capture
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Converting the frame from BGR to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        frame_with_boxes, sleepy_faces = classify_faces(frame_rgb)
        frames.append(frame_with_boxes)

        # Updating maximum sleepy faces count if current frame has more
        if sleepy_faces > max_sleepy_faces:
            max_sleepy_faces = sleepy_faces

    cap.release()

    # Saving the processed video to a temporary file
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
    out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    for frame in frames:
        # Converting the frame back to BGR for saving
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()

    return temp_file.name, f'The maximum number of sleepy faces detected in the entire video is: {max_sleepy_faces}'


def image_interface(image):
    temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    image.save(temp_input.name)
    result_image, detection_info = process_image(temp_input.name)
    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    cv2.imwrite(temp_output.name, result_image)
    return temp_output.name, detection_info


def video_interface(video_path):
    result_video, detection_info = process_video(video_path)
    return result_video, detection_info

image_container = gr.Interface(fn=image_interface, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()])
video_container = gr.Interface(fn=video_interface, inputs=gr.Video(), outputs=[gr.Video(), gr.Text()])

with gr.Blocks() as container:
    gr.Markdown("# Sleep Detection")
    gr.Markdown("### Made by Joy Biswas")
    gr.TabbedInterface([image_container, video_container], ["Image Detection", "Video Detection"])

container.launch()