Spaces:

Cosmos48
/

gradio-sleep_detection

Build error

App Files Files Community

gradio-sleep_detection / sleep_detection.py

Cosmos48

Update sleep_detection.py

4802794 verified over 1 year ago

raw

history blame contribute delete

6.35 kB

	import cv2
	import numpy as np
	from mtcnn import MTCNN
	from tensorflow.keras.models import load_model
	import gradio as gr
	import tempfile
	import os

	base_dir = os.getcwd()
	saved_model_dir = os.path.join(base_dir, 'saved_model')

	# Loading the trained CNN model
	model = load_model(saved_model_dir)


	# Initializing the MTCNN face detector
	detector = MTCNN()


	# Making a function for fetching roi coordinates, performing classification and displaying image having detection
	def classify_faces(img):
	faces = detector.detect_faces(img)
	sleepy_faces = 0

	for face in faces:
	x, y, w, h = face['box']
	x1 = face['keypoints']['left_eye'][0]
	y1 = face['keypoints']['left_eye'][1]
	x2 = face['keypoints']['right_eye'][0]
	y2 = face['keypoints']['right_eye'][1]

	# Calculating the distance between the eyes
	eye_distance = np.sqrt((x2 - x1) 2 + (y2 - y1) 2)

	if abs(x2 - x1) > abs(y2 - y1):
	# For larger horizontal distances between eyes
	roi_w = int(5 / 3 * eye_distance)
	roi_h = int(2 / 3 * eye_distance)
	else:
	# For larger vertical distances between eyes
	roi_w = int(2 / 3 * eye_distance)
	roi_h = int(5 / 3 * eye_distance)

	# Calculating the center point between the eyes
	center_x = (x1 + x2) // 2
	center_y = (y1 + y2) // 2

	# Adjusting ROI coordinates to keep the center point between the eyes (It essentially grabs the top left
	# coordinate of the roi box)
	roi_x = int(center_x - roi_w / 2)
	roi_y = int(center_y - roi_h / 2)

	# Ensuring the ROI is within image boundaries
	roi_x = max(0, roi_x)
	roi_y = max(0, roi_y)
	roi_w = min(roi_w, img.shape[1] - roi_x)
	roi_h = min(roi_h, img.shape[0] - roi_y)

	crop = img[roi_y:roi_y + roi_h, roi_x:roi_x + roi_w]

	# Preprocessing the cropped face image as required by your model
	crop_resized = cv2.resize(crop, (224, 224)) # Assuming your model expects 224x224 input
	crop_resized = crop_resized.astype('float32') / 255.0 # Normalize if required
	crop_resized = np.expand_dims(crop_resized, axis=0) # Add batch dimension

	prediction = model.predict(crop_resized)
	label = 'Awake' if prediction[0][0] < 0.5 else 'Sleepy'

	if label == 'Sleepy':
	sleepy_faces += 1
	# Drawing bounding box around drowsy face
	cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
	# Putting text label above the bounding box
	cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

	# Displaying the count of sleepy faces detected
	cv2.putText(img, f'Sleepy faces: {sleepy_faces}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

	return img, sleepy_faces


	def process_image(image_path):

	img = cv2.imread(image_path)

	if img is None:
	raise ValueError(f"Unable to load image from {image_path}")

	# Converting BGR to RGB
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	# Resizing the image to fit within a fixed window size while maintaining aspect ratio
	max_display_size = 800 # Maximum width or height for displaying the image
	height, width, _ = img_rgb.shape
	if max(height, width) > max_display_size:
	if height > width:
	new_height = max_display_size
	new_width = int(width * (max_display_size / height))
	else:
	new_width = max_display_size
	new_height = int(height * (max_display_size / width))
	img_rgb = cv2.resize(img_rgb, (new_width, new_height))

	# Classifying faces and retrieving image with bounding boxes
	img_with_boxes, sleepy_faces = classify_faces(img_rgb)

	# Converting back to BGR for saving with OpenCV
	img_with_boxes_bgr = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR)

	return img_with_boxes_bgr, f'Sleepy faces detected: {sleepy_faces}'


	def process_video(video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []
	max_sleepy_faces = 0

	# Obtaining frame dimensions and FPS from the video capture
	frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = int(cap.get(cv2.CAP_PROP_FPS))

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Converting the frame from BGR to RGB
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	frame_with_boxes, sleepy_faces = classify_faces(frame_rgb)
	frames.append(frame_with_boxes)

	# Updating maximum sleepy faces count if current frame has more
	if sleepy_faces > max_sleepy_faces:
	max_sleepy_faces = sleepy_faces

	cap.release()

	# Saving the processed video to a temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
	out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

	for frame in frames:
	# Converting the frame back to BGR for saving
	frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	out.write(frame_bgr)

	out.release()

	return temp_file.name, f'The maximum number of sleepy faces detected in the entire video is: {max_sleepy_faces}'


	def image_interface(image):
	temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
	image.save(temp_input.name)
	result_image, detection_info = process_image(temp_input.name)
	temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
	cv2.imwrite(temp_output.name, result_image)
	return temp_output.name, detection_info


	def video_interface(video_path):
	result_video, detection_info = process_video(video_path)
	return result_video, detection_info

	image_container = gr.Interface(fn=image_interface, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()])
	video_container = gr.Interface(fn=video_interface, inputs=gr.Video(), outputs=[gr.Video(), gr.Text()])

	with gr.Blocks() as container:
	gr.Markdown("# Sleep Detection")
	gr.Markdown("### Made by Joy Biswas")
	gr.TabbedInterface([image_container, video_container], ["Image Detection", "Video Detection"])

	container.launch()