Spaces:

Zynaly
/

intelligent-Interviewer-Hirelytics

Build error

App Files Files Community

intelligent-Interviewer-Hirelytics / gaze_processor.py

Zynaly

Upload 3 files

3efff6d verified 11 months ago

raw

history blame contribute delete

7.79 kB

	import cv2
	import numpy as np
	import mediapipe as mp
	from aura.gaze_tracking import GazeTracking
	import time
	import threading

	class ImageEnhancer:
	@staticmethod
	def enhance_image(frame):
	lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
	l, a, b = cv2.split(lab)
	clahe = cv2.createCLAHE(clipLimit=4, tileGridSize=(8,8))
	l_clahe = clahe.apply(l)
	lab_clahe = cv2.merge((l_clahe, a, b))
	enhanced_image = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)
	gamma = 1.8
	look_up_table = np.array([((i / 255.0) ** gamma) * 255 for i in range(256)]).astype("uint8")
	enhanced_image2 = cv2.LUT(enhanced_image, look_up_table)
	gray = cv2.cvtColor(enhanced_image2, cv2.COLOR_BGR2GRAY)
	edges = cv2.Canny(gray, 250, 500)
	edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
	enhanced_image3 = cv2.addWeighted(enhanced_image2, 0.8, edges_colored, 0.2, 0)
	return enhanced_image3

	class KalmanFilter:
	def __init__(self):
	self.kalman = cv2.KalmanFilter(4, 2)
	self.kalman.measurementMatrix = np.array([[1, 0, 0, 0],
	[0, 1, 0, 0]], np.float32)
	self.kalman.transitionMatrix = np.array([[1, 0, 1, 0],
	[0, 1, 0, 1],
	[0, 0, 1, 0],
	[0, 0, 0, 1]], np.float32)
	self.kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 0.5

	def correct(self, coord):
	return self.kalman.correct(np.array([coord[0], coord[1]], np.float32))

	def predict(self):
	return self.kalman.predict()

	class GazeProcessor:
	def __init__(self, webcam):
	self.webcam = webcam
	self.webcam_lock = threading.Lock()
	self.gaze_flip = GazeTracking()
	self.gaze_orig = GazeTracking()
	self.kalman_filter = KalmanFilter()
	self.total_distractions = 0
	self.focused = 0
	self.right_distractions = 0
	self.left_distractions = 0

	def _get_gaze_info(self, gaze):
	if gaze.is_right():
	return "Yes1", (0, 0, 255)
	elif gaze.is_left():
	return "Yes", (0, 0, 255)
	elif gaze.is_center():
	return "No", (0, 255, 0)
	return "", (255, 0, 0)

	def _get_eye_position(self, gaze):
	left_pupil = gaze.pupil_left_coords()
	right_pupil = gaze.pupil_right_coords()
	if left_pupil and right_pupil:
	avg_pupil = np.mean([left_pupil, right_pupil], axis=0)
	corrected_pupil = self.kalman_filter.correct(avg_pupil)
	predicted_pupil = self.kalman_filter.predict()
	return predicted_pupil
	return None

	def process_combined(self):
	mp_face_mesh = mp.solutions.face_mesh
	face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
	mp_drawing = mp.solutions.drawing_utils
	drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

	while self.webcam.isOpened():
	start_time = time.time()
	with self.webcam_lock:
	ret, frame = self.webcam.read()
	if not ret:
	break

	enhanced_frame = ImageEnhancer.enhance_image(frame)
	flipped_frame = cv2.flip(enhanced_frame, 1)
	self.gaze_flip.refresh(flipped_frame)
	self.gaze_orig.refresh(enhanced_frame)
	new_frame_orig = self.gaze_orig.annotated_frame()

	text_flip, _ = self._get_gaze_info(self.gaze_flip)
	text_orig, _ = self._get_gaze_info(self.gaze_orig)
	text_pose = ''
	image_rgb = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB)
	results = face_mesh.process(image_rgb)

	img_h, img_w, img_c = enhanced_frame.shape
	face_3d = []
	face_2d = []

	if results.multi_face_landmarks:
	for face_landmarks in results.multi_face_landmarks:
	for idx, lm in enumerate(face_landmarks.landmark):
	if idx in [33, 263, 1, 61, 291, 199]:
	if idx == 1:
	nose_2d = (lm.x * img_w, lm.y * img_h)
	nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
	x, y = int(lm.x * img_w), int(lm.y * img_h)
	face_2d.append([x, y])
	face_3d.append([x, y, lm.z])

	face_2d = np.array(face_2d, dtype=np.float64)
	face_3d = np.array(face_3d, dtype=np.float64)
	focal_length = 1 * img_w
	cam_matrix = np.array([[focal_length, 0, img_w / 2],
	[0, focal_length, img_h / 2],
	[0, 0, 1]])
	dist_matrix = np.zeros((4, 1), dtype=np.float64)
	success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
	rmat, jac = cv2.Rodrigues(rot_vec)
	angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
	x = angles[0] * 360
	y = angles[1] * 360
	z = angles[2] * 360
	if y < -13:
	text_pose = "Looking Right"
	elif y > 13:
	text_pose = "Looking Left"
	elif x < -13:
	text_pose = "Looking Down"
	elif x > 13:
	text_pose = "Looking Up"
	else:
	text_pose = "Forward"

	if text_pose != "Forward" or text_flip == "Yes1" or text_orig == "Yes1" or text_flip == 'Yes' or text_orig == 'Yes':
	self.total_distractions += 1
	if text_pose == "Forward" and text_orig == "No" and text_flip == "No":
	self.focused += 1
	if text_flip == "Yes1" or text_orig == "Yes" or text_pose == "Looking Left":
	self.left_distractions += 1
	if text_flip == "Yes" or text_orig == "Yes1" or text_pose == "Looking Right":
	self.right_distractions += 1

	cv2.putText(new_frame_orig, text_pose, (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
	mp_drawing.draw_landmarks(
	image=new_frame_orig,
	landmark_list=face_landmarks,
	connections=mp_face_mesh.FACEMESH_TESSELATION,
	landmark_drawing_spec=drawing_spec,
	connection_drawing_spec=drawing_spec)

	frame_height, frame_width = new_frame_orig.shape[:2]
	cv2.putText(new_frame_orig, f"Distractions: {self.total_distractions}", (10, frame_height - 450),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
	cv2.putText(new_frame_orig, f"Focused: {self.focused}", (10, frame_height - 400),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

	_, buffer = cv2.imencode('.jpg', new_frame_orig)
	frame = buffer.tobytes()
	yield frame

	def get_focus_and_distractions(self):
	return {
	'distractions': self.total_distractions,
	'focus': self.focused,
	'left_distractions': self.left_distractions,
	'right_distractions': self.right_distractions
	}