Spaces:
Build error
Build error
File size: 6,349 Bytes
95ab51c 8e9e1d4 95ab51c 4802794 95ab51c 9160ab9 95ab51c 56892ee 95ab51c 24ea1ba | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
import gradio as gr
import tempfile
import os
base_dir = os.getcwd()
saved_model_dir = os.path.join(base_dir, 'saved_model')
# Loading the trained CNN model
model = load_model(saved_model_dir)
# Initializing the MTCNN face detector
detector = MTCNN()
# Making a function for fetching roi coordinates, performing classification and displaying image having detection
def classify_faces(img):
faces = detector.detect_faces(img)
sleepy_faces = 0
for face in faces:
x, y, w, h = face['box']
x1 = face['keypoints']['left_eye'][0]
y1 = face['keypoints']['left_eye'][1]
x2 = face['keypoints']['right_eye'][0]
y2 = face['keypoints']['right_eye'][1]
# Calculating the distance between the eyes
eye_distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
if abs(x2 - x1) > abs(y2 - y1):
# For larger horizontal distances between eyes
roi_w = int(5 / 3 * eye_distance)
roi_h = int(2 / 3 * eye_distance)
else:
# For larger vertical distances between eyes
roi_w = int(2 / 3 * eye_distance)
roi_h = int(5 / 3 * eye_distance)
# Calculating the center point between the eyes
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
# Adjusting ROI coordinates to keep the center point between the eyes (It essentially grabs the top left
# coordinate of the roi box)
roi_x = int(center_x - roi_w / 2)
roi_y = int(center_y - roi_h / 2)
# Ensuring the ROI is within image boundaries
roi_x = max(0, roi_x)
roi_y = max(0, roi_y)
roi_w = min(roi_w, img.shape[1] - roi_x)
roi_h = min(roi_h, img.shape[0] - roi_y)
crop = img[roi_y:roi_y + roi_h, roi_x:roi_x + roi_w]
# Preprocessing the cropped face image as required by your model
crop_resized = cv2.resize(crop, (224, 224)) # Assuming your model expects 224x224 input
crop_resized = crop_resized.astype('float32') / 255.0 # Normalize if required
crop_resized = np.expand_dims(crop_resized, axis=0) # Add batch dimension
prediction = model.predict(crop_resized)
label = 'Awake' if prediction[0][0] < 0.5 else 'Sleepy'
if label == 'Sleepy':
sleepy_faces += 1
# Drawing bounding box around drowsy face
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# Putting text label above the bounding box
cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
# Displaying the count of sleepy faces detected
cv2.putText(img, f'Sleepy faces: {sleepy_faces}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
return img, sleepy_faces
def process_image(image_path):
img = cv2.imread(image_path)
if img is None:
raise ValueError(f"Unable to load image from {image_path}")
# Converting BGR to RGB
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Resizing the image to fit within a fixed window size while maintaining aspect ratio
max_display_size = 800 # Maximum width or height for displaying the image
height, width, _ = img_rgb.shape
if max(height, width) > max_display_size:
if height > width:
new_height = max_display_size
new_width = int(width * (max_display_size / height))
else:
new_width = max_display_size
new_height = int(height * (max_display_size / width))
img_rgb = cv2.resize(img_rgb, (new_width, new_height))
# Classifying faces and retrieving image with bounding boxes
img_with_boxes, sleepy_faces = classify_faces(img_rgb)
# Converting back to BGR for saving with OpenCV
img_with_boxes_bgr = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR)
return img_with_boxes_bgr, f'Sleepy faces detected: {sleepy_faces}'
def process_video(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
max_sleepy_faces = 0
# Obtaining frame dimensions and FPS from the video capture
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Converting the frame from BGR to RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_with_boxes, sleepy_faces = classify_faces(frame_rgb)
frames.append(frame_with_boxes)
# Updating maximum sleepy faces count if current frame has more
if sleepy_faces > max_sleepy_faces:
max_sleepy_faces = sleepy_faces
cap.release()
# Saving the processed video to a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
for frame in frames:
# Converting the frame back to BGR for saving
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
out.write(frame_bgr)
out.release()
return temp_file.name, f'The maximum number of sleepy faces detected in the entire video is: {max_sleepy_faces}'
def image_interface(image):
temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
image.save(temp_input.name)
result_image, detection_info = process_image(temp_input.name)
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
cv2.imwrite(temp_output.name, result_image)
return temp_output.name, detection_info
def video_interface(video_path):
result_video, detection_info = process_video(video_path)
return result_video, detection_info
image_container = gr.Interface(fn=image_interface, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()])
video_container = gr.Interface(fn=video_interface, inputs=gr.Video(), outputs=[gr.Video(), gr.Text()])
with gr.Blocks() as container:
gr.Markdown("# Sleep Detection")
gr.Markdown("### Made by Joy Biswas")
gr.TabbedInterface([image_container, video_container], ["Image Detection", "Video Detection"])
container.launch()
|