cpu

Sleeping

App Files Files Community

cpu / backend /eye_state_detector.py

3v324v23

Update Comic123 with local comic folder files

83e35a7 5 months ago

raw

history blame contribute delete

8.83 kB

	"""
	Enhanced eye state detection to avoid half-closed eyes in frames
	"""

	import cv2
	import numpy as np
	from typing import Dict, Tuple, List
	import os

	class EyeStateDetector:
	"""Detect eye states (open, closed, half-closed) in images"""

	def __init__(self):
	# Load cascade classifiers
	self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
	self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')

	# Eye aspect ratio thresholds
	self.EAR_THRESHOLD_CLOSED = 0.2
	self.EAR_THRESHOLD_HALF = 0.25
	self.EAR_THRESHOLD_OPEN = 0.3

	def check_eyes_state(self, image_path: str) -> Dict[str, any]:
	"""
	Check the state of eyes in an image

	Returns:
	dict: {
	'state': 'open'\|'closed'\|'half_closed'\|'unknown',
	'confidence': float (0-1),
	'suitable_for_comic': bool,
	'eye_aspect_ratio': float
	}
	"""
	img = cv2.imread(image_path)
	if img is None:
	return {
	'state': 'unknown',
	'confidence': 0.0,
	'suitable_for_comic': False,
	'eye_aspect_ratio': 0.0
	}

	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Detect faces
	faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
	if len(faces) == 0:
	return {
	'state': 'unknown',
	'confidence': 0.0,
	'suitable_for_comic': True, # No face, might be background
	'eye_aspect_ratio': 0.0
	}

	# Process the largest face
	x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
	face_roi = gray[y:y+h, x:x+w]

	# Detect eyes in face region
	eyes = self.eye_cascade.detectMultiScale(face_roi, 1.05, 5)

	if len(eyes) < 2:
	# Less than 2 eyes detected - might be closed or profile view
	return {
	'state': 'possibly_closed',
	'confidence': 0.5,
	'suitable_for_comic': False,
	'eye_aspect_ratio': 0.0
	}

	# Calculate eye metrics
	eye_metrics = self._analyze_eye_openness(face_roi, eyes)

	# Determine state
	state, confidence, suitable = self._determine_eye_state(eye_metrics)

	return {
	'state': state,
	'confidence': confidence,
	'suitable_for_comic': suitable,
	'eye_aspect_ratio': eye_metrics['average_ear']
	}

	def _analyze_eye_openness(self, face_roi, eyes) -> Dict[str, float]:
	"""Analyze how open the eyes are"""
	eye_aspects = []

	for (ex, ey, ew, eh) in eyes[:2]: # Process first two eyes
	eye_roi = face_roi[ey:ey+eh, ex:ex+ew]

	# Calculate eye aspect ratio (simplified)
	# In a real implementation, we'd use facial landmarks
	# Here we use a simpler approach based on eye region intensity

	# Check vertical gradient (open eyes have more gradient)
	gradient = cv2.Sobel(eye_roi, cv2.CV_64F, 0, 1, ksize=3)
	gradient_magnitude = np.abs(gradient).mean()

	# Check darkness ratio (closed eyes are darker)
	mean_intensity = eye_roi.mean()

	# Estimate eye aspect ratio
	ear = self._estimate_ear(gradient_magnitude, mean_intensity, eh)
	eye_aspects.append(ear)

	return {
	'average_ear': np.mean(eye_aspects) if eye_aspects else 0.0,
	'min_ear': min(eye_aspects) if eye_aspects else 0.0,
	'max_ear': max(eye_aspects) if eye_aspects else 0.0
	}

	def _estimate_ear(self, gradient, intensity, height) -> float:
	"""Estimate eye aspect ratio from simple features"""
	# Normalize features
	gradient_score = min(gradient / 50.0, 1.0)
	intensity_score = min(intensity / 150.0, 1.0)
	height_score = min(height / 30.0, 1.0)

	# Combine scores (higher = more open)
	ear = (gradient_score * 0.5 + intensity_score * 0.3 + height_score * 0.2)
	return ear

	def _determine_eye_state(self, metrics: Dict[str, float]) -> Tuple[str, float, bool]:
	"""Determine eye state from metrics"""
	ear = metrics['average_ear']

	if ear < self.EAR_THRESHOLD_CLOSED:
	return 'closed', 0.8, False
	elif ear < self.EAR_THRESHOLD_HALF:
	return 'half_closed', 0.7, False
	elif ear < self.EAR_THRESHOLD_OPEN:
	return 'partially_open', 0.6, True # Acceptable but not ideal
	else:
	return 'open', 0.9, True

	def select_best_frame(self, frame_paths: List[str], target_emotion: str = None) -> str:
	"""
	Select the best frame from a list, avoiding half-closed eyes

	Args:
	frame_paths: List of frame file paths
	target_emotion: Optional emotion to match

	Returns:
	Path to the best frame
	"""
	frame_scores = []

	for frame_path in frame_paths:
	eye_state = self.check_eyes_state(frame_path)

	# Calculate score
	score = 0.0

	# Eye state scoring
	if eye_state['state'] == 'open':
	score += 1.0
	elif eye_state['state'] == 'partially_open':
	score += 0.7
	elif eye_state['state'] == 'half_closed':
	score += 0.2
	else:
	score += 0.1

	# Confidence bonus
	score += eye_state['confidence'] * 0.3

	# Suitability check
	if not eye_state['suitable_for_comic']:
	score *= 0.5 # Penalize unsuitable frames

	frame_scores.append((frame_path, score, eye_state))

	# Sort by score and return best
	frame_scores.sort(key=lambda x: x[1], reverse=True)

	if frame_scores:
	best_frame, best_score, best_state = frame_scores[0]
	print(f" 👁️ Selected frame with {best_state['state']} eyes (score: {best_score:.2f})")
	return best_frame

	return frame_paths[0] if frame_paths else None


	def enhance_frame_selection(video_path: str, subtitle, output_dir: str, frames_to_extract: int = 5):
	"""
	Extract multiple frames and select the best one (no half-closed eyes)

	Args:
	video_path: Path to video file
	subtitle: Subtitle object with start/end times
	output_dir: Directory to save the selected frame
	frames_to_extract: Number of candidate frames to extract

	Returns:
	Path to the selected frame
	"""
	import tempfile

	detector = EyeStateDetector()

	# Create temp directory for candidate frames
	temp_dir = tempfile.mkdtemp()

	try:
	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS)

	# Calculate time range
	start_time = subtitle.start.total_seconds()
	end_time = subtitle.end.total_seconds()
	duration = end_time - start_time

	# Extract multiple frames across the subtitle duration
	candidate_frames = []

	for i in range(frames_to_extract):
	# Distribute frames evenly across the duration
	time_offset = (i + 1) / (frames_to_extract + 1) * duration
	timestamp = start_time + time_offset
	frame_num = int(timestamp * fps)

	# Extract frame
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
	ret, frame = cap.read()

	if ret:
	temp_path = os.path.join(temp_dir, f"candidate_{i}.png")
	cv2.imwrite(temp_path, frame)
	candidate_frames.append(temp_path)

	cap.release()

	# Select best frame
	if candidate_frames:
	best_frame_path = detector.select_best_frame(candidate_frames)

	# Copy best frame to output
	if best_frame_path:
	output_path = os.path.join(output_dir, f"frame_{subtitle.index:03d}.png")
	img = cv2.imread(best_frame_path)
	cv2.imwrite(output_path, img)
	return output_path

	finally:
	# Clean up temp files
	import shutil
	shutil.rmtree(temp_dir, ignore_errors=True)

	return None