|
|
""" |
|
|
Enhanced eye state detection to avoid half-closed eyes in frames |
|
|
""" |
|
|
|
|
|
import cv2 |
|
|
import numpy as np |
|
|
from typing import Dict, Tuple, List |
|
|
import os |
|
|
|
|
|
class EyeStateDetector: |
|
|
"""Detect eye states (open, closed, half-closed) in images""" |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') |
|
|
self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml') |
|
|
|
|
|
|
|
|
self.EAR_THRESHOLD_CLOSED = 0.2 |
|
|
self.EAR_THRESHOLD_HALF = 0.25 |
|
|
self.EAR_THRESHOLD_OPEN = 0.3 |
|
|
|
|
|
def check_eyes_state(self, image_path: str) -> Dict[str, any]: |
|
|
""" |
|
|
Check the state of eyes in an image |
|
|
|
|
|
Returns: |
|
|
dict: { |
|
|
'state': 'open'|'closed'|'half_closed'|'unknown', |
|
|
'confidence': float (0-1), |
|
|
'suitable_for_comic': bool, |
|
|
'eye_aspect_ratio': float |
|
|
} |
|
|
""" |
|
|
img = cv2.imread(image_path) |
|
|
if img is None: |
|
|
return { |
|
|
'state': 'unknown', |
|
|
'confidence': 0.0, |
|
|
'suitable_for_comic': False, |
|
|
'eye_aspect_ratio': 0.0 |
|
|
} |
|
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
|
|
|
faces = self.face_cascade.detectMultiScale(gray, 1.1, 4) |
|
|
if len(faces) == 0: |
|
|
return { |
|
|
'state': 'unknown', |
|
|
'confidence': 0.0, |
|
|
'suitable_for_comic': True, |
|
|
'eye_aspect_ratio': 0.0 |
|
|
} |
|
|
|
|
|
|
|
|
x, y, w, h = max(faces, key=lambda f: f[2] * f[3]) |
|
|
face_roi = gray[y:y+h, x:x+w] |
|
|
|
|
|
|
|
|
eyes = self.eye_cascade.detectMultiScale(face_roi, 1.05, 5) |
|
|
|
|
|
if len(eyes) < 2: |
|
|
|
|
|
return { |
|
|
'state': 'possibly_closed', |
|
|
'confidence': 0.5, |
|
|
'suitable_for_comic': False, |
|
|
'eye_aspect_ratio': 0.0 |
|
|
} |
|
|
|
|
|
|
|
|
eye_metrics = self._analyze_eye_openness(face_roi, eyes) |
|
|
|
|
|
|
|
|
state, confidence, suitable = self._determine_eye_state(eye_metrics) |
|
|
|
|
|
return { |
|
|
'state': state, |
|
|
'confidence': confidence, |
|
|
'suitable_for_comic': suitable, |
|
|
'eye_aspect_ratio': eye_metrics['average_ear'] |
|
|
} |
|
|
|
|
|
def _analyze_eye_openness(self, face_roi, eyes) -> Dict[str, float]: |
|
|
"""Analyze how open the eyes are""" |
|
|
eye_aspects = [] |
|
|
|
|
|
for (ex, ey, ew, eh) in eyes[:2]: |
|
|
eye_roi = face_roi[ey:ey+eh, ex:ex+ew] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gradient = cv2.Sobel(eye_roi, cv2.CV_64F, 0, 1, ksize=3) |
|
|
gradient_magnitude = np.abs(gradient).mean() |
|
|
|
|
|
|
|
|
mean_intensity = eye_roi.mean() |
|
|
|
|
|
|
|
|
ear = self._estimate_ear(gradient_magnitude, mean_intensity, eh) |
|
|
eye_aspects.append(ear) |
|
|
|
|
|
return { |
|
|
'average_ear': np.mean(eye_aspects) if eye_aspects else 0.0, |
|
|
'min_ear': min(eye_aspects) if eye_aspects else 0.0, |
|
|
'max_ear': max(eye_aspects) if eye_aspects else 0.0 |
|
|
} |
|
|
|
|
|
def _estimate_ear(self, gradient, intensity, height) -> float: |
|
|
"""Estimate eye aspect ratio from simple features""" |
|
|
|
|
|
gradient_score = min(gradient / 50.0, 1.0) |
|
|
intensity_score = min(intensity / 150.0, 1.0) |
|
|
height_score = min(height / 30.0, 1.0) |
|
|
|
|
|
|
|
|
ear = (gradient_score * 0.5 + intensity_score * 0.3 + height_score * 0.2) |
|
|
return ear |
|
|
|
|
|
def _determine_eye_state(self, metrics: Dict[str, float]) -> Tuple[str, float, bool]: |
|
|
"""Determine eye state from metrics""" |
|
|
ear = metrics['average_ear'] |
|
|
|
|
|
if ear < self.EAR_THRESHOLD_CLOSED: |
|
|
return 'closed', 0.8, False |
|
|
elif ear < self.EAR_THRESHOLD_HALF: |
|
|
return 'half_closed', 0.7, False |
|
|
elif ear < self.EAR_THRESHOLD_OPEN: |
|
|
return 'partially_open', 0.6, True |
|
|
else: |
|
|
return 'open', 0.9, True |
|
|
|
|
|
def select_best_frame(self, frame_paths: List[str], target_emotion: str = None) -> str: |
|
|
""" |
|
|
Select the best frame from a list, avoiding half-closed eyes |
|
|
|
|
|
Args: |
|
|
frame_paths: List of frame file paths |
|
|
target_emotion: Optional emotion to match |
|
|
|
|
|
Returns: |
|
|
Path to the best frame |
|
|
""" |
|
|
frame_scores = [] |
|
|
|
|
|
for frame_path in frame_paths: |
|
|
eye_state = self.check_eyes_state(frame_path) |
|
|
|
|
|
|
|
|
score = 0.0 |
|
|
|
|
|
|
|
|
if eye_state['state'] == 'open': |
|
|
score += 1.0 |
|
|
elif eye_state['state'] == 'partially_open': |
|
|
score += 0.7 |
|
|
elif eye_state['state'] == 'half_closed': |
|
|
score += 0.2 |
|
|
else: |
|
|
score += 0.1 |
|
|
|
|
|
|
|
|
score += eye_state['confidence'] * 0.3 |
|
|
|
|
|
|
|
|
if not eye_state['suitable_for_comic']: |
|
|
score *= 0.5 |
|
|
|
|
|
frame_scores.append((frame_path, score, eye_state)) |
|
|
|
|
|
|
|
|
frame_scores.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
if frame_scores: |
|
|
best_frame, best_score, best_state = frame_scores[0] |
|
|
print(f" 👁️ Selected frame with {best_state['state']} eyes (score: {best_score:.2f})") |
|
|
return best_frame |
|
|
|
|
|
return frame_paths[0] if frame_paths else None |
|
|
|
|
|
|
|
|
def enhance_frame_selection(video_path: str, subtitle, output_dir: str, frames_to_extract: int = 5): |
|
|
""" |
|
|
Extract multiple frames and select the best one (no half-closed eyes) |
|
|
|
|
|
Args: |
|
|
video_path: Path to video file |
|
|
subtitle: Subtitle object with start/end times |
|
|
output_dir: Directory to save the selected frame |
|
|
frames_to_extract: Number of candidate frames to extract |
|
|
|
|
|
Returns: |
|
|
Path to the selected frame |
|
|
""" |
|
|
import tempfile |
|
|
|
|
|
detector = EyeStateDetector() |
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
try: |
|
|
cap = cv2.VideoCapture(video_path) |
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
|
|
|
|
|
|
|
start_time = subtitle.start.total_seconds() |
|
|
end_time = subtitle.end.total_seconds() |
|
|
duration = end_time - start_time |
|
|
|
|
|
|
|
|
candidate_frames = [] |
|
|
|
|
|
for i in range(frames_to_extract): |
|
|
|
|
|
time_offset = (i + 1) / (frames_to_extract + 1) * duration |
|
|
timestamp = start_time + time_offset |
|
|
frame_num = int(timestamp * fps) |
|
|
|
|
|
|
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) |
|
|
ret, frame = cap.read() |
|
|
|
|
|
if ret: |
|
|
temp_path = os.path.join(temp_dir, f"candidate_{i}.png") |
|
|
cv2.imwrite(temp_path, frame) |
|
|
candidate_frames.append(temp_path) |
|
|
|
|
|
cap.release() |
|
|
|
|
|
|
|
|
if candidate_frames: |
|
|
best_frame_path = detector.select_best_frame(candidate_frames) |
|
|
|
|
|
|
|
|
if best_frame_path: |
|
|
output_path = os.path.join(output_dir, f"frame_{subtitle.index:03d}.png") |
|
|
img = cv2.imread(best_frame_path) |
|
|
cv2.imwrite(output_path, img) |
|
|
return output_path |
|
|
|
|
|
finally: |
|
|
|
|
|
import shutil |
|
|
shutil.rmtree(temp_dir, ignore_errors=True) |
|
|
|
|
|
return None |