Spaces:

immortalindeed
/

FocusFlow

Sleeping

App Files Files Community

FocusFlow / src /vision_engine.py

immortalindeed

Initial commit: FocusFlow FastAPI/C++ Architecture

adcc112 2 months ago

raw

history blame contribute delete

36.9 kB

	"""
	Vision Engine: Advanced Face Analysis with InspireFace-Equivalent Features
	Implements features similar to InspireFace SDK using MediaPipe:
	- Face Detection & Tracking
	- 106-Point Landmark Detection
	- Head Pose Estimation (Yaw, Pitch, Roll)
	- Face Emotion (7 classes)
	- Silent Liveness (anti-spoofing)
	- Cooperative Liveness (blink verification)
	- Face Quality Score
	- Mask Detection
	- Face Attributes (Age/Gender estimation)
	- Blink Rate & Attention Score
	"""

	import cv2
	import mediapipe as mp
	import numpy as np
	from typing import Dict, Optional, Tuple, List
	from collections import deque
	import time


	class VisionEngine:
	"""
	Production-grade face analysis engine with InspireFace-equivalent features
	"""

	def __init__(self, process_width=640):
	# Initialize MediaPipe Face Mesh with refined landmarks (478 points)
	self.mp_face_mesh = mp.solutions.face_mesh
	self.face_mesh = self.mp_face_mesh.FaceMesh(
	max_num_faces=1,
	refine_landmarks=True, # 478 landmarks including iris
	min_detection_confidence=0.5,
	min_tracking_confidence=0.5
	)

	# Dedicated mesh for meetings (initialized lazily)
	self.meeting_mesh = None
	self.single_mesh = self.face_mesh
	self.is_meeting_mode = False

	# Drawing utilities
	self.mp_drawing = mp.solutions.drawing_utils
	self.mp_drawing_styles = mp.solutions.drawing_styles

	# Performance settings
	self.process_width = process_width
	self.frame_count = 0
	self.last_landmarks = None

	# Webcam
	self.cap = None
	self.ear_threshold = 0.35

	# ===== TRACKING BUFFERS =====
	self.head_positions = deque(maxlen=30)
	self.ear_history = deque(maxlen=10)
	self.emotion_history = deque(maxlen=15)
	self.quality_history = deque(maxlen=20)

	# Blink Detection
	self.blink_count = 0
	self.blink_timestamps = deque(maxlen=60) # Store blink times
	self.last_blink_state = False
	self.blinks_per_minute = 0
	self.session_start_time = time.time()

	# Cooperative Liveness (blink verification)
	self.coop_liveness_blinks = 0
	self.coop_liveness_start = None
	self.coop_liveness_verified = False

	# Anti-spoofing
	self.texture_scores = deque(maxlen=30)
	self.color_variance_history = deque(maxlen=20)
	self.prev_frame_gray = None
	self.motion_scores = deque(maxlen=20)

	# Face Quality tracking
	self.face_sizes = deque(maxlen=10)

	# 3D Face Model Points for head pose
	self.model_points = np.array([
	(0.0, 0.0, 0.0), # Nose tip
	(0.0, -330.0, -65.0), # Chin
	(-225.0, 170.0, -135.0), # Left eye corner
	(225.0, 170.0, -135.0), # Right eye corner
	(-150.0, -150.0, -125.0), # Left mouth corner
	(150.0, -150.0, -125.0) # Right mouth corner
	], dtype=np.float64)

	def start_camera(self, camera_id: int = 0) -> bool:
	"""Initialize webcam"""
	if self.cap is not None and self.cap.isOpened():
	return True

	self.cap = cv2.VideoCapture(camera_id)
	self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
	self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
	self.cap.set(cv2.CAP_PROP_FPS, 30)

	# Reset tracking
	self._reset_tracking()
	return self.cap.isOpened()

	def _reset_tracking(self):
	"""Reset all tracking buffers"""
	self.blink_count = 0
	self.blink_timestamps.clear()
	self.session_start_time = time.time()
	self.head_positions.clear()
	self.ear_history.clear()
	self.texture_scores.clear()
	self.coop_liveness_blinks = 0
	self.coop_liveness_start = None
	self.coop_liveness_verified = False
	self.prev_frame_gray = None

	def stop_camera(self):
	"""Release webcam"""
	if self.cap:
	self.cap.release()
	self.cap = None

	def get_frame(self, resize=True) -> Optional[np.ndarray]:
	"""Capture a single frame"""
	if not self.cap or not self.cap.isOpened():
	return None

	ret, frame = self.cap.read()
	if not ret:
	return None

	if resize and frame.shape[1] > self.process_width:
	height = int(frame.shape[0] * (self.process_width / frame.shape[1]))
	frame = cv2.resize(frame, (self.process_width, height))

	return frame

	def analyze_frame(self, frame: np.ndarray, skip_frames=2) -> Dict:
	"""
	Comprehensive frame analysis with InspireFace-equivalent features
	"""
	self.frame_count += 1
	h, w = frame.shape[:2]

	# Skip frames for performance
	if self.frame_count % skip_frames != 0 and self.last_landmarks is not None:
	return self.last_landmarks

	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	results = self.face_mesh.process(rgb_frame)

	if not results.multi_face_landmarks:
	return self._no_face_result()

	face_landmarks = results.multi_face_landmarks[0]

	# ===== CORE METRICS =====

	# 1. Eye Aspect Ratio (EAR)
	ear_left = self._calculate_ear(face_landmarks, [33, 160, 158, 133, 153, 144])
	ear_right = self._calculate_ear(face_landmarks, [362, 385, 387, 263, 373, 380])
	avg_ear = (ear_left + ear_right) / 2.0
	self.ear_history.append(avg_ear)
	smoothed_ear = sum(self.ear_history) / len(self.ear_history)

	# 2. Mouth Aspect Ratio (MAR)
	mar = self._calculate_mar(face_landmarks)

	# 3. Head Pose (Yaw, Pitch, Roll)
	yaw, pitch, roll = self._estimate_head_pose(face_landmarks, w, h)

	# ===== INSPIRFACE-EQUIVALENT FEATURES =====

	# 4. Face Quality Score
	face_quality = self._calculate_face_quality(face_landmarks, gray_frame, w, h, yaw, pitch)

	# 5. Mask Detection
	is_wearing_mask, mask_confidence = self._detect_mask(face_landmarks, mar)

	# 6. Face Emotion (7 classes)
	emotion_score, emotion_label, emotion_probs = self._detect_emotion(face_landmarks, mar, smoothed_ear)
	self.emotion_history.append(emotion_score)
	smoothed_emotion = sum(self.emotion_history) / len(self.emotion_history)

	# 7. Silent Liveness (Anti-Spoofing)
	silent_liveness_score = self._calculate_silent_liveness(frame, gray_frame, face_landmarks, w, h)

	# 8. Blink Detection & Rate
	blink_rate = self._update_blink_detection(smoothed_ear)

	# 9. Cooperative Liveness (blink verification)
	coop_liveness_status = self._update_cooperative_liveness(smoothed_ear)

	# 10. Gaze Score
	gaze_score = self._calculate_gaze_score(face_landmarks, yaw, pitch)

	# 11. Head Stability
	head_stability = self._calculate_head_stability(face_landmarks)

	# 12. Attention Score
	attention_score = self._calculate_attention_score(gaze_score, head_stability, smoothed_ear, emotion_label)

	# 13. Face Attributes (approximate age/gender)
	face_attributes = self._estimate_face_attributes(face_landmarks)

	# ===== DETECTION LOGIC =====
	ear_threshold = getattr(self, 'ear_threshold', 0.35)
	is_drowsy = smoothed_ear < ear_threshold
	is_yawning = mar > 0.50

	# Combined liveness status
	liveness_status = self._determine_liveness_status(
	silent_liveness_score, coop_liveness_status, blink_rate, head_stability
	)

	# Store previous frame for motion detection
	self.prev_frame_gray = gray_frame.copy()

	result = {
	# Core metrics
	'gaze_score': gaze_score,
	'emotion_score': smoothed_emotion,
	'head_stability': head_stability,
	'face_detected': True,
	'eye_openness': smoothed_ear,
	'mouth_openness': mar,
	'is_yawning': is_yawning,
	'is_drowsy': is_drowsy,
	'ear_threshold': ear_threshold,

	# Head pose
	'head_pose': (yaw, pitch, roll),
	'yaw': yaw,
	'pitch': pitch,
	'roll': roll,

	# InspireFace-equivalent
	'face_quality': face_quality,
	'is_wearing_mask': is_wearing_mask,
	'mask_confidence': mask_confidence,
	'emotion_label': emotion_label,
	'emotion_probs': emotion_probs,
	'silent_liveness_score': silent_liveness_score,
	'coop_liveness_status': coop_liveness_status,
	'coop_liveness_verified': self.coop_liveness_verified,
	'blink_rate': blink_rate,
	'blink_count': self.blink_count,
	'attention_score': attention_score,
	'face_attributes': face_attributes,

	# Legacy compatibility
	'liveness_status': liveness_status,
	'anti_spoof_score': silent_liveness_score,
	}

	self.last_landmarks = result
	return result

	def _no_face_result(self) -> Dict:
	"""Return empty result when no face detected"""
	return {
	'gaze_score': 0.0, 'emotion_score': 0.0, 'head_stability': 0.0,
	'face_detected': False, 'eye_openness': 0.0, 'mouth_openness': 0.0,
	'is_yawning': False, 'is_drowsy': False, 'liveness_status': "No Face",
	'attention_score': 0.0, 'blink_rate': 0, 'head_pose': (0, 0, 0),
	'emotion_label': 'Unknown', 'anti_spoof_score': 0.0,
	'face_quality': 0.0, 'is_wearing_mask': False, 'mask_confidence': 0.0,
	'silent_liveness_score': 0.0, 'coop_liveness_status': 'Waiting',
	'emotion_probs': {}, 'face_attributes': {}, 'yaw': 0, 'pitch': 0, 'roll': 0,
	'blink_count': 0, 'coop_liveness_verified': False, 'ear_threshold': 0.35
	}

	def _calculate_ear(self, landmarks, indices) -> float:
	"""Calculate Eye Aspect Ratio"""
	p2 = np.array([landmarks.landmark[indices[1]].x, landmarks.landmark[indices[1]].y])
	p6 = np.array([landmarks.landmark[indices[5]].x, landmarks.landmark[indices[5]].y])
	p3 = np.array([landmarks.landmark[indices[2]].x, landmarks.landmark[indices[2]].y])
	p5 = np.array([landmarks.landmark[indices[4]].x, landmarks.landmark[indices[4]].y])
	p1 = np.array([landmarks.landmark[indices[0]].x, landmarks.landmark[indices[0]].y])
	p4 = np.array([landmarks.landmark[indices[3]].x, landmarks.landmark[indices[3]].y])

	dist_v1 = np.linalg.norm(p2 - p6)
	dist_v2 = np.linalg.norm(p3 - p5)
	dist_h = np.linalg.norm(p1 - p4)

	if dist_h == 0: return 0.0
	return (dist_v1 + dist_v2) / (2.0 * dist_h)

	def _calculate_mar(self, landmarks) -> float:
	"""Calculate Mouth Aspect Ratio"""
	p_top = np.array([landmarks.landmark[13].x, landmarks.landmark[13].y])
	p_bot = np.array([landmarks.landmark[14].x, landmarks.landmark[14].y])
	p_left = np.array([landmarks.landmark[61].x, landmarks.landmark[61].y])
	p_right = np.array([landmarks.landmark[291].x, landmarks.landmark[291].y])

	height = np.linalg.norm(p_top - p_bot)
	width = np.linalg.norm(p_left - p_right)

	if width == 0: return 0.0
	return height / width

	def _estimate_head_pose(self, landmarks, w, h) -> Tuple[float, float, float]:
	"""Estimate head pose using solvePnP"""
	image_points = np.array([
	(landmarks.landmark[1].x * w, landmarks.landmark[1].y * h),
	(landmarks.landmark[152].x * w, landmarks.landmark[152].y * h),
	(landmarks.landmark[33].x * w, landmarks.landmark[33].y * h),
	(landmarks.landmark[263].x * w, landmarks.landmark[263].y * h),
	(landmarks.landmark[61].x * w, landmarks.landmark[61].y * h),
	(landmarks.landmark[291].x * w, landmarks.landmark[291].y * h)
	], dtype=np.float64)

	focal_length = w
	center = (w / 2, h / 2)
	camera_matrix = np.array([
	[focal_length, 0, center[0]],
	[0, focal_length, center[1]],
	[0, 0, 1]
	], dtype=np.float64)

	dist_coeffs = np.zeros((4, 1))

	success, rotation_vector, _ = cv2.solvePnP(
	self.model_points, image_points, camera_matrix, dist_coeffs
	)

	if not success:
	return (0, 0, 0)

	rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

	sy = np.sqrt(rotation_matrix[0, 0] 2 + rotation_matrix[1, 0] 2)
	singular = sy < 1e-6

	if not singular:
	pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
	yaw = np.arctan2(-rotation_matrix[2, 0], sy)
	roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
	else:
	pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
	yaw = np.arctan2(-rotation_matrix[2, 0], sy)
	roll = 0

	# Convert to degrees
	pitch = np.degrees(pitch)
	yaw = np.degrees(yaw)
	roll = np.degrees(roll)

	# Human-readable angle normalization
	# Ensure angles are within -180 to 180 range
	if pitch > 180: pitch -= 360
	if yaw > 180: yaw -= 360
	if roll > 180: roll -= 360

	# Pitch correction (OpenCV coordinate system usually has inverted Y)
	# We want looking up = positive, looking down = negative
	# Or centered = 0. Often it comes out as ~180 for "forward"
	if abs(pitch) > 90:
	if pitch > 0: pitch = 180 - pitch
	else: pitch = -180 - pitch

	return (yaw, pitch, roll)

	def _calculate_face_quality(self, landmarks, gray, w, h, yaw, pitch) -> float:
	"""
	Calculate face quality score (InspireFace equivalent)
	Factors: sharpness, pose, size, brightness, symmetry
	"""
	# 1. Sharpness (Laplacian variance)
	laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
	sharpness_score = min(1.0, laplacian_var / 300)

	# 2. Pose quality (face should be frontal)
	yaw_score = max(0, 1 - abs(yaw) / 45)
	pitch_score = max(0, 1 - abs(pitch) / 45)
	pose_score = (yaw_score + pitch_score) / 2

	# 3. Face size (should be reasonably large)
	face_points = [(landmarks.landmark[i].x * w, landmarks.landmark[i].y * h)
	for i in [10, 152, 234, 454]] # Top, bottom, left, right
	face_width = abs(face_points[2][0] - face_points[3][0])
	face_height = abs(face_points[0][1] - face_points[1][1])
	face_area = face_width * face_height
	size_score = min(1.0, face_area / (w * h * 0.15)) # Face should be at least 15% of frame
	self.face_sizes.append(size_score)

	# 4. Brightness (not too dark or too bright)
	mean_brightness = np.mean(gray)
	brightness_score = 1.0 - abs(mean_brightness - 127) / 127

	# 5. Symmetry check
	left_eye = landmarks.landmark[33]
	right_eye = landmarks.landmark[263]
	nose = landmarks.landmark[1]
	left_dist = abs(left_eye.x - nose.x)
	right_dist = abs(right_eye.x - nose.x)
	symmetry_score = 1.0 - min(1.0, abs(left_dist - right_dist) * 5)

	# Weighted combination
	quality = (
	sharpness_score * 0.25 +
	pose_score * 0.25 +
	size_score * 0.20 +
	brightness_score * 0.15 +
	symmetry_score * 0.15
	)

	self.quality_history.append(quality)
	return sum(self.quality_history) / len(self.quality_history)

	def _detect_mask(self, landmarks, mar) -> Tuple[bool, float]:
	"""
	Detect if person is wearing a mask
	Conservative detection - only triggers when clearly wearing a mask
	"""
	# Key points for mask detection
	nose_tip = landmarks.landmark[1]
	mouth_top = landmarks.landmark[13]
	mouth_bottom = landmarks.landmark[14]
	chin = landmarks.landmark[152]
	left_cheek = landmarks.landmark[234]
	right_cheek = landmarks.landmark[454]

	# Mouth aspect ratio - masks make mouth nearly invisible
	# Normal MAR is 0.1-0.5, masked face has MAR near 0 or very low
	mouth_hidden = mar < 0.08 # Very strict - mouth basically invisible

	# Check face width vs chin-mouth distance (masks compress lower face)
	face_width = abs(right_cheek.x - left_cheek.x)
	mouth_chin_dist = abs(mouth_bottom.y - chin.y)
	nose_mouth_dist = abs(nose_tip.y - mouth_top.y)

	# Normally nose to mouth is about 1/3 of face height
	# With mask, the landmarks bunch together unnaturally
	compression = nose_mouth_dist / max(face_width, 0.001)
	heavily_compressed = compression < 0.08 # Very strict

	# Landmark confidence check - masks often cause unstable mouth landmarks
	mouth_height = abs(mouth_bottom.y - mouth_top.y)
	mouth_too_flat = mouth_height < 0.005 # Basically a line

	# Only mark as masked if multiple strong indicators
	mask_score = 0.0
	if mouth_hidden:
	mask_score += 0.4
	if heavily_compressed:
	mask_score += 0.3
	if mouth_too_flat:
	mask_score += 0.3

	# Require VERY high confidence to declare mask
	is_wearing_mask = mask_score > 0.7

	return is_wearing_mask, mask_score

	def _detect_emotion(self, landmarks, mar, ear) -> Tuple[float, str, Dict]:
	"""
	Detect facial emotion (7 classes like InspireFace)
	Classes: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised
	"""
	# Mouth shape analysis
	left_mouth = landmarks.landmark[61]
	right_mouth = landmarks.landmark[291]
	mouth_top = landmarks.landmark[13]
	mouth_bottom = landmarks.landmark[14]

	mouth_width = abs(right_mouth.x - left_mouth.x)
	mouth_height = abs(mouth_bottom.y - mouth_top.y)

	# Eyebrow analysis
	left_brow_inner = landmarks.landmark[55]
	right_brow_inner = landmarks.landmark[285]
	left_eye_center = landmarks.landmark[159]
	right_eye_center = landmarks.landmark[386]

	left_brow_raise = left_eye_center.y - left_brow_inner.y
	right_brow_raise = right_eye_center.y - right_brow_inner.y
	avg_brow_raise = (left_brow_raise + right_brow_raise) / 2

	# Mouth corners relative to center
	mouth_center_y = (mouth_top.y + mouth_bottom.y) / 2
	left_corner_y = landmarks.landmark[61].y
	right_corner_y = landmarks.landmark[291].y
	corner_pull = mouth_center_y - (left_corner_y + right_corner_y) / 2

	# Initialize probabilities - default to Neutral/Focused
	probs = {
	'Focused': 0.5, # Default when looking attentive
	'Neutral': 0.4,
	'Happy': 0.0,
	'Sad': 0.0,
	'Angry': 0.0,
	'Fearful': 0.0,
	'Disgusted': 0.0,
	'Surprised': 0.0
	}

	# Happy - corners clearly up, wide mouth
	if corner_pull > 0.015: # raised threshold
	probs['Happy'] = min(1.0, 0.5 + corner_pull * 12)
	probs['Focused'] = 0.2
	probs['Neutral'] = 0.1

	# Surprised - clearly raised brows, open mouth
	if avg_brow_raise > 0.05 and mar > 0.35:
	probs['Surprised'] = min(1.0, 0.5 + avg_brow_raise * 6 + mar)
	probs['Focused'] = 0.2

	# Sad - corners CLEARLY down (much stricter threshold)
	if corner_pull < -0.025: # was -0.005, now much stricter
	probs['Sad'] = min(1.0, 0.3 + abs(corner_pull) * 10) # reduced multiplier
	probs['Focused'] = 0.3

	# Angry - clearly lowered brows, tight mouth
	if avg_brow_raise < 0.01 and mouth_width < 0.08:
	probs['Angry'] = 0.4
	probs['Focused'] = 0.3

	# Focused state - eyes open, looking at screen, neutral expression
	if ear > 0.25 and abs(corner_pull) < 0.015:
	probs['Focused'] = max(probs['Focused'], 0.6)

	# Get max emotion
	emotion_label = max(probs, key=probs.get)
	emotion_score = probs[emotion_label]

	return emotion_score, emotion_label, probs

	def _calculate_silent_liveness(self, frame, gray, landmarks, w, h) -> float:
	"""
	Silent Liveness Detection (Anti-Spoofing)
	Multi-factor analysis without requiring user interaction
	"""
	scores = []

	# 1. Texture Analysis (real faces have more micro-texture)
	face_bbox = self._get_face_bbox(landmarks, w, h)
	if face_bbox:
	x1, y1, x2, y2 = face_bbox
	face_region = gray[y1:y2, x1:x2]
	if face_region.size > 100:
	laplacian_var = cv2.Laplacian(face_region, cv2.CV_64F).var()
	texture_score = min(1.0, laplacian_var / 400)
	self.texture_scores.append(texture_score)
	scores.append(sum(self.texture_scores) / len(self.texture_scores))

	# 2. Color Distribution (real faces have natural color variation)
	if face_bbox:
	x1, y1, x2, y2 = face_bbox
	face_color = frame[y1:y2, x1:x2]
	if face_color.size > 100:
	hsv = cv2.cvtColor(face_color, cv2.COLOR_BGR2HSV)
	h_std = np.std(hsv[:, :, 0])
	s_std = np.std(hsv[:, :, 1])
	color_var = (h_std + s_std) / 2
	color_score = min(1.0, color_var / 30)
	self.color_variance_history.append(color_score)
	scores.append(sum(self.color_variance_history) / len(self.color_variance_history))

	# 3. Motion Analysis (photos don't have natural micro-movements)
	if self.prev_frame_gray is not None and face_bbox:
	x1, y1, x2, y2 = face_bbox
	prev_face = self.prev_frame_gray[y1:y2, x1:x2]
	curr_face = gray[y1:y2, x1:x2]
	if prev_face.shape == curr_face.shape and prev_face.size > 100:
	diff = cv2.absdiff(prev_face, curr_face)
	motion = np.mean(diff)
	# Real faces: small but non-zero motion
	motion_score = 1.0 if 1.0 < motion < 15.0 else 0.5
	self.motion_scores.append(motion_score)
	scores.append(sum(self.motion_scores) / len(self.motion_scores))

	# 4. Blink Detection (photos don't blink)
	blink_score = 1.0 if self.blink_count > 0 else 0.3
	scores.append(blink_score)

	# 5. Head Movement (photos are static)
	if len(self.head_positions) > 10:
	positions = np.array(list(self.head_positions))
	position_var = np.var(positions, axis=0).sum()
	movement_score = 1.0 if 0.00005 < position_var < 0.01 else 0.3
	scores.append(movement_score)

	if scores:
	return sum(scores) / len(scores)
	return 0.5

	def _get_face_bbox(self, landmarks, w, h, padding=10) -> Optional[Tuple[int, int, int, int]]:
	"""Get face bounding box from landmarks"""
	x_coords = [int(landmarks.landmark[i].x * w) for i in range(468)]
	y_coords = [int(landmarks.landmark[i].y * h) for i in range(468)]

	x1 = max(0, min(x_coords) - padding)
	y1 = max(0, min(y_coords) - padding)
	x2 = min(w, max(x_coords) + padding)
	y2 = min(h, max(y_coords) + padding)

	if x2 > x1 and y2 > y1:
	return (x1, y1, x2, y2)
	return None

	def _update_blink_detection(self, ear) -> int:
	"""Track blinks and calculate blinks per minute"""
	is_blink = ear < 0.22

	if is_blink and not self.last_blink_state:
	self.blink_count += 1
	self.blink_timestamps.append(time.time())

	self.last_blink_state = is_blink

	# Calculate BPM from recent blinks
	now = time.time()
	recent_blinks = [t for t in self.blink_timestamps if now - t < 60]
	self.blinks_per_minute = len(recent_blinks)

	return self.blinks_per_minute

	def _update_cooperative_liveness(self, ear) -> str:
	"""
	Cooperative Liveness: Verify user is live by asking for blinks
	"""
	if self.coop_liveness_verified:
	return "Verified ✓"

	if self.coop_liveness_start is None:
	self.coop_liveness_start = time.time()
	self.coop_liveness_blinks = 0

	# Track blinks in verification window
	if ear < 0.22 and not self.last_blink_state:
	self.coop_liveness_blinks += 1

	# Check if verified (2+ blinks in 10 seconds)
	elapsed = time.time() - self.coop_liveness_start

	if self.coop_liveness_blinks >= 2:
	self.coop_liveness_verified = True
	return "Verified ✓"
	elif elapsed < 10:
	return f"Blink {self.coop_liveness_blinks}/2"
	else:
	# Reset and try again
	self.coop_liveness_start = time.time()
	self.coop_liveness_blinks = 0
	return "Blink 0/2"

	def _calculate_gaze_score(self, landmarks, yaw, pitch) -> float:
	"""
	Calculate gaze/attention score
	More lenient - focus on whether person is facing camera
	"""
	# Normalize angles to reasonable range (-180 to 180 can happen)
	yaw = yaw % 360
	if yaw > 180: yaw -= 360
	pitch = pitch % 360
	if pitch > 180: pitch -= 360

	# Very lenient thresholds - 45 degrees tolerance
	yaw_score = max(0, 1 - abs(yaw) / 45)
	pitch_score = max(0, 1 - abs(pitch) / 45)

	# Eye position in frame (should be roughly centered)
	left_eye = landmarks.landmark[33]
	right_eye = landmarks.landmark[263]
	eye_center_x = (left_eye.x + right_eye.x) / 2
	eye_center_y = (left_eye.y + right_eye.y) / 2

	# Position score - lenient, face should be in frame
	x_deviation = abs(eye_center_x - 0.5)
	y_deviation = abs(eye_center_y - 0.4) # Eyes typically at 40% from top
	position_score = max(0, 1 - (x_deviation + y_deviation) * 1.5)

	# Weighted average - position matters most (face in frame = looking)
	gaze = yaw_score * 0.3 + pitch_score * 0.2 + position_score * 0.5

	# Boost if face is well-centered
	if x_deviation < 0.15 and y_deviation < 0.2:
	gaze = min(1.0, gaze + 0.15)

	return min(1.0, max(0.0, gaze))

	def _calculate_head_stability(self, landmarks) -> float:
	"""Calculate head stability from movement history"""
	nose = landmarks.landmark[1]
	current_pos = (nose.x, nose.y, getattr(nose, 'z', 0))
	self.head_positions.append(current_pos)

	if len(self.head_positions) < 5:
	return 0.9

	positions = np.array(list(self.head_positions))
	variance = np.var(positions, axis=0).sum()
	stability = 1.0 - min(1.0, variance * 100)

	return max(0.0, min(1.0, stability))

	def _calculate_attention_score(self, gaze, stability, ear, emotion) -> float:
	"""Composite attention score"""
	eye_score = min(1.0, ear / 0.3) if ear > 0.2 else ear / 0.2

	# Emotion bonus (focused/neutral = more attention)
	emotion_bonus = 0.1 if emotion in ['Focused', 'Neutral', 'Happy'] else 0

	attention = gaze * 0.4 + stability * 0.3 + eye_score * 0.3 + emotion_bonus
	return min(1.0, max(0.0, attention))

	def _estimate_face_attributes(self, landmarks) -> Dict:
	"""
	Estimate face attributes (simplified)
	Note: Real age/gender requires deep learning models
	"""
	# Face proportions analysis for approximate attributes
	left_eye = landmarks.landmark[33]
	right_eye = landmarks.landmark[263]
	nose = landmarks.landmark[1]
	chin = landmarks.landmark[152]

	# Eye-to-chin ratio (approximation for age category)
	eye_chin_dist = abs(chin.y - (left_eye.y + right_eye.y) / 2)
	eye_distance = abs(right_eye.x - left_eye.x)

	# Very rough heuristic (not accurate, just demo)
	face_ratio = eye_chin_dist / max(eye_distance, 0.001)

	if face_ratio < 1.2:
	age_group = "Child"
	elif face_ratio < 1.4:
	age_group = "Young Adult"
	elif face_ratio < 1.6:
	age_group = "Adult"
	else:
	age_group = "Senior"

	return {
	'age_group': age_group,
	'face_ratio': round(face_ratio, 2),
	'note': 'Approximate (requires ML model for accuracy)'
	}

	def _determine_liveness_status(self, silent_score, coop_status, blink_rate, stability) -> str:
	"""Combined liveness determination"""
	if self.coop_liveness_verified and silent_score > 0.6:
	return "Live ✓"
	elif silent_score > 0.7 and blink_rate > 5:
	return "Live"
	elif silent_score > 0.5:
	return "Checking..."
	else:
	return "Suspicious"

	def draw_landmarks(self, frame: np.ndarray, draw: bool = True) -> np.ndarray:
	"""Draw bounding box and label instead of full mesh"""
	if not draw: return frame

	# We need landmarks to draw the box, but we don't want to re-process if possible
	# Check if we have last result cached
	if hasattr(self, 'last_landmarks') and self.last_landmarks and self.last_landmarks.get('face_detected'):
	# Use cached detection if available
	# Note: This might lag by 1 frame but is much faster

	# Re-get the bounding box dynamically if possible
	h, w = frame.shape[:2]
	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	results = self.face_mesh.process(rgb_frame)

	if results.multi_face_landmarks:
	face_landmarks = results.multi_face_landmarks[0]

	# Calculate Bounding Box
	x_min, y_min = w, h
	x_max, y_max = 0, 0

	for lm in face_landmarks.landmark:
	x, y = int(lm.x * w), int(lm.y * h)
	if x < x_min: x_min = x
	if x > x_max: x_max = x
	if y < y_min: y_min = y
	if y > y_max: y_max = y

	# Add padding
	pad = 20
	x_min = max(0, x_min - pad)
	y_min = max(0, y_min - pad - 20) # Extra space for label
	x_max = min(w, x_max + pad)
	y_max = min(h, y_max + pad)

	# Draw Corner Rect (Professional Look)
	# Top-Left
	color = (0, 255, 0) # Green
	thickness = 2
	line_len = 30

	cv2.line(frame, (x_min, y_min), (x_min + line_len, y_min), color, thickness)
	cv2.line(frame, (x_min, y_min), (x_min, y_min + line_len), color, thickness)

	# Top-Right
	cv2.line(frame, (x_max, y_min), (x_max - line_len, y_min), color, thickness)
	cv2.line(frame, (x_max, y_min), (x_max, y_min + line_len), color, thickness)

	# Bottom-Left
	cv2.line(frame, (x_min, y_max), (x_min + line_len, y_max), color, thickness)
	cv2.line(frame, (x_min, y_max), (x_min, y_max - line_len), color, thickness)

	# Bottom-Right
	cv2.line(frame, (x_max, y_max), (x_max - line_len, y_max), color, thickness)
	cv2.line(frame, (x_max, y_max), (x_max, y_max - line_len), color, thickness)

	# Draw Label
	label = "Person"
	(w_text, h_text), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
	cv2.rectangle(frame, (x_min, y_min - 25), (x_min + w_text + 10, y_min - 5), color, -1)
	cv2.putText(frame, label, (x_min + 5, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)

	return frame

	def set_meeting_mode(self, enabled: bool):
	"""Toggle between single-user focus and multi-user meeting mode"""
	if enabled == self.is_meeting_mode: return

	self.is_meeting_mode = enabled

	if enabled:
	if self.meeting_mesh is None:
	self.meeting_mesh = self.mp_face_mesh.FaceMesh(
	max_num_faces=10, # Support up to 10 people
	refine_landmarks=True,
	min_detection_confidence=0.3, # Lower confidence for smaller faces in grid
	min_tracking_confidence=0.3
	)
	self.face_mesh = self.meeting_mesh
	else:
	self.face_mesh = self.single_mesh

	def analyze_multi_faces(self, frame) -> List[Dict]:
	"""
	Analyze multiple faces in a frame (for meetings)
	Returns list of results for each detected face
	"""
	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	h, w = frame.shape[:2]
	results = self.face_mesh.process(rgb_frame)

	output = []

	if results.multi_face_landmarks:
	for i, landmarks in enumerate(results.multi_face_landmarks):
	# 1. Calculate Bounding Box
	x_values = [lm.x for lm in landmarks.landmark]
	y_values = [lm.y for lm in landmarks.landmark]
	bbox = {
	'x_min': int(min(x_values) * w),
	'x_max': int(max(x_values) * w),
	'y_min': int(min(y_values) * h),
	'y_max': int(max(y_values) * h)
	}

	# 2. Instantaneous Analysis (No history smoothing for multi-face MVP)

	# Head Pose
	yaw, pitch, roll = self._estimate_head_pose(landmarks, w, h)

	# EAR/Eye Openness
	left_ear = self._calculate_ear(landmarks, [33, 160, 158, 133, 153, 144])
	right_ear = self._calculate_ear(landmarks, [362, 385, 387, 263, 373, 380])
	avg_ear = (left_ear + right_ear) / 2

	# MAR/Mouth
	mouth_pts = [61, 291, 39, 181, 0, 17, 269, 405]
	mar = self._calculate_mar(landmarks)

	# Gaze
	gaze_score = self._calculate_gaze_score(landmarks, yaw, pitch)

	# Emotion
	emotion_score, emotion_label, _ = self._detect_emotion(landmarks, mar, avg_ear)

	# Liveness/Drowsiness flags
	is_drowsy = avg_ear < 0.25
	is_yawning = mar > 0.6

	# Composite Score
	attention = gaze_score
	engagement = (attention * 0.5 + emotion_score * 0.3 + avg_ear * 0.2) * 100
	if is_drowsy: engagement *= 0.5
	if is_yawning: engagement *= 0.6

	result = {
	'id': i,
	'bbox': bbox,
	'engagement_score': min(100, max(0, engagement)),
	'is_drowsy': is_drowsy,
	'is_yawning': is_yawning,
	'emotion': emotion_label,
	'attention': attention
	}
	output.append(result)

	return output