Spaces:

Cosmos48
/

gradio-sleep_detection

Build error

File size: 6,349 Bytes

import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
import gradio as gr
import tempfile
import os

base_dir = os.getcwd()
saved_model_dir = os.path.join(base_dir, 'saved_model')

# Loading the trained CNN model
model = load_model(saved_model_dir)


# Initializing the MTCNN face detector
detector = MTCNN()


# Making a function for fetching roi coordinates, performing classification and displaying image having detection
def classify_faces(img):
    faces = detector.detect_faces(img)
    sleepy_faces = 0

    for face in faces:
        x, y, w, h = face['box']
        x1 = face['keypoints']['left_eye'][0]
        y1 = face['keypoints']['left_eye'][1]
        x2 = face['keypoints']['right_eye'][0]
        y2 = face['keypoints']['right_eye'][1]

        # Calculating the distance between the eyes
        eye_distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

        if abs(x2 - x1) > abs(y2 - y1):
            # For larger horizontal distances between eyes
            roi_w = int(5 / 3 * eye_distance)
            roi_h = int(2 / 3 * eye_distance)
        else:
            # For larger vertical distances between eyes
            roi_w = int(2 / 3 * eye_distance)
            roi_h = int(5 / 3 * eye_distance)

        # Calculating the center point between the eyes
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Adjusting ROI coordinates to keep the center point between the eyes (It essentially grabs the top left
        # coordinate of the roi box)
        roi_x = int(center_x - roi_w / 2)
        roi_y = int(center_y - roi_h / 2)

        # Ensuring the ROI is within image boundaries
        roi_x = max(0, roi_x)
        roi_y = max(0, roi_y)
        roi_w = min(roi_w, img.shape[1] - roi_x)
        roi_h = min(roi_h, img.shape[0] - roi_y)

        crop = img[roi_y:roi_y + roi_h, roi_x:roi_x + roi_w]

        # Preprocessing the cropped face image as required by your model
        crop_resized = cv2.resize(crop, (224, 224))  # Assuming your model expects 224x224 input
        crop_resized = crop_resized.astype('float32') / 255.0  # Normalize if required
        crop_resized = np.expand_dims(crop_resized, axis=0)  # Add batch dimension

        prediction = model.predict(crop_resized)
        label = 'Awake' if prediction[0][0] < 0.5 else 'Sleepy'

        if label == 'Sleepy':
            sleepy_faces += 1
            # Drawing bounding box around drowsy face
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
            # Putting text label above the bounding box
            cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    # Displaying the count of sleepy faces detected
    cv2.putText(img, f'Sleepy faces: {sleepy_faces}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    return img, sleepy_faces


def process_image(image_path):
    
    img = cv2.imread(image_path)

    if img is None:
        raise ValueError(f"Unable to load image from {image_path}")

    # Converting BGR to RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resizing the image to fit within a fixed window size while maintaining aspect ratio
    max_display_size = 800  # Maximum width or height for displaying the image
    height, width, _ = img_rgb.shape
    if max(height, width) > max_display_size:
        if height > width:
            new_height = max_display_size
            new_width = int(width * (max_display_size / height))
        else:
            new_width = max_display_size
            new_height = int(height * (max_display_size / width))
        img_rgb = cv2.resize(img_rgb, (new_width, new_height))

    # Classifying faces and retrieving image with bounding boxes
    img_with_boxes, sleepy_faces = classify_faces(img_rgb)

    # Converting back to BGR for saving with OpenCV
    img_with_boxes_bgr = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR)

    return img_with_boxes_bgr, f'Sleepy faces detected: {sleepy_faces}'


def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    max_sleepy_faces = 0

    # Obtaining frame dimensions and FPS from the video capture
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Converting the frame from BGR to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        frame_with_boxes, sleepy_faces = classify_faces(frame_rgb)
        frames.append(frame_with_boxes)

        # Updating maximum sleepy faces count if current frame has more
        if sleepy_faces > max_sleepy_faces:
            max_sleepy_faces = sleepy_faces

    cap.release()

    # Saving the processed video to a temporary file
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
    out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    for frame in frames:
        # Converting the frame back to BGR for saving
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()

    return temp_file.name, f'The maximum number of sleepy faces detected in the entire video is: {max_sleepy_faces}'


def image_interface(image):
    temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    image.save(temp_input.name)
    result_image, detection_info = process_image(temp_input.name)
    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    cv2.imwrite(temp_output.name, result_image)
    return temp_output.name, detection_info


def video_interface(video_path):
    result_video, detection_info = process_video(video_path)
    return result_video, detection_info

image_container = gr.Interface(fn=image_interface, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()])
video_container = gr.Interface(fn=video_interface, inputs=gr.Video(), outputs=[gr.Video(), gr.Text()])

with gr.Blocks() as container:
    gr.Markdown("# Sleep Detection")
    gr.Markdown("### Made by Joy Biswas")
    gr.TabbedInterface([image_container, video_container], ["Image Detection", "Video Detection"])

container.launch()