Spaces:
Build error
Build error
| import cv2 | |
| import numpy as np | |
| from mtcnn import MTCNN | |
| from tensorflow.keras.models import load_model | |
| import gradio as gr | |
| import tempfile | |
| import os | |
| base_dir = os.getcwd() | |
| saved_model_dir = os.path.join(base_dir, 'saved_model') | |
| # Loading the trained CNN model | |
| model = load_model(saved_model_dir) | |
| # Initializing the MTCNN face detector | |
| detector = MTCNN() | |
| # Making a function for fetching roi coordinates, performing classification and displaying image having detection | |
| def classify_faces(img): | |
| faces = detector.detect_faces(img) | |
| sleepy_faces = 0 | |
| for face in faces: | |
| x, y, w, h = face['box'] | |
| x1 = face['keypoints']['left_eye'][0] | |
| y1 = face['keypoints']['left_eye'][1] | |
| x2 = face['keypoints']['right_eye'][0] | |
| y2 = face['keypoints']['right_eye'][1] | |
| # Calculating the distance between the eyes | |
| eye_distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) | |
| if abs(x2 - x1) > abs(y2 - y1): | |
| # For larger horizontal distances between eyes | |
| roi_w = int(5 / 3 * eye_distance) | |
| roi_h = int(2 / 3 * eye_distance) | |
| else: | |
| # For larger vertical distances between eyes | |
| roi_w = int(2 / 3 * eye_distance) | |
| roi_h = int(5 / 3 * eye_distance) | |
| # Calculating the center point between the eyes | |
| center_x = (x1 + x2) // 2 | |
| center_y = (y1 + y2) // 2 | |
| # Adjusting ROI coordinates to keep the center point between the eyes (It essentially grabs the top left | |
| # coordinate of the roi box) | |
| roi_x = int(center_x - roi_w / 2) | |
| roi_y = int(center_y - roi_h / 2) | |
| # Ensuring the ROI is within image boundaries | |
| roi_x = max(0, roi_x) | |
| roi_y = max(0, roi_y) | |
| roi_w = min(roi_w, img.shape[1] - roi_x) | |
| roi_h = min(roi_h, img.shape[0] - roi_y) | |
| crop = img[roi_y:roi_y + roi_h, roi_x:roi_x + roi_w] | |
| # Preprocessing the cropped face image as required by your model | |
| crop_resized = cv2.resize(crop, (224, 224)) # Assuming your model expects 224x224 input | |
| crop_resized = crop_resized.astype('float32') / 255.0 # Normalize if required | |
| crop_resized = np.expand_dims(crop_resized, axis=0) # Add batch dimension | |
| prediction = model.predict(crop_resized) | |
| label = 'Awake' if prediction[0][0] < 0.5 else 'Sleepy' | |
| if label == 'Sleepy': | |
| sleepy_faces += 1 | |
| # Drawing bounding box around drowsy face | |
| cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) | |
| # Putting text label above the bounding box | |
| cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) | |
| # Displaying the count of sleepy faces detected | |
| cv2.putText(img, f'Sleepy faces: {sleepy_faces}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) | |
| return img, sleepy_faces | |
| def process_image(image_path): | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| raise ValueError(f"Unable to load image from {image_path}") | |
| # Converting BGR to RGB | |
| img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| # Resizing the image to fit within a fixed window size while maintaining aspect ratio | |
| max_display_size = 800 # Maximum width or height for displaying the image | |
| height, width, _ = img_rgb.shape | |
| if max(height, width) > max_display_size: | |
| if height > width: | |
| new_height = max_display_size | |
| new_width = int(width * (max_display_size / height)) | |
| else: | |
| new_width = max_display_size | |
| new_height = int(height * (max_display_size / width)) | |
| img_rgb = cv2.resize(img_rgb, (new_width, new_height)) | |
| # Classifying faces and retrieving image with bounding boxes | |
| img_with_boxes, sleepy_faces = classify_faces(img_rgb) | |
| # Converting back to BGR for saving with OpenCV | |
| img_with_boxes_bgr = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR) | |
| return img_with_boxes_bgr, f'Sleepy faces detected: {sleepy_faces}' | |
| def process_video(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| frames = [] | |
| max_sleepy_faces = 0 | |
| # Obtaining frame dimensions and FPS from the video capture | |
| frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Converting the frame from BGR to RGB | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| frame_with_boxes, sleepy_faces = classify_faces(frame_rgb) | |
| frames.append(frame_with_boxes) | |
| # Updating maximum sleepy faces count if current frame has more | |
| if sleepy_faces > max_sleepy_faces: | |
| max_sleepy_faces = sleepy_faces | |
| cap.release() | |
| # Saving the processed video to a temporary file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') | |
| out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) | |
| for frame in frames: | |
| # Converting the frame back to BGR for saving | |
| frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| out.write(frame_bgr) | |
| out.release() | |
| return temp_file.name, f'The maximum number of sleepy faces detected in the entire video is: {max_sleepy_faces}' | |
| def image_interface(image): | |
| temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') | |
| image.save(temp_input.name) | |
| result_image, detection_info = process_image(temp_input.name) | |
| temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') | |
| cv2.imwrite(temp_output.name, result_image) | |
| return temp_output.name, detection_info | |
| def video_interface(video_path): | |
| result_video, detection_info = process_video(video_path) | |
| return result_video, detection_info | |
| image_container = gr.Interface(fn=image_interface, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()]) | |
| video_container = gr.Interface(fn=video_interface, inputs=gr.Video(), outputs=[gr.Video(), gr.Text()]) | |
| with gr.Blocks() as container: | |
| gr.Markdown("# Sleep Detection") | |
| gr.Markdown("### Made by Joy Biswas") | |
| gr.TabbedInterface([image_container, video_container], ["Image Detection", "Video Detection"]) | |
| container.launch() | |