Testcomic

Sleeping

File size: 8,833 Bytes

83e35a7

"""
Enhanced eye state detection to avoid half-closed eyes in frames
"""

import cv2
import numpy as np
from typing import Dict, Tuple, List
import os

class EyeStateDetector:
    """Detect eye states (open, closed, half-closed) in images"""
    
    def __init__(self):
        # Load cascade classifiers
        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
        
        # Eye aspect ratio thresholds
        self.EAR_THRESHOLD_CLOSED = 0.2
        self.EAR_THRESHOLD_HALF = 0.25
        self.EAR_THRESHOLD_OPEN = 0.3
        
    def check_eyes_state(self, image_path: str) -> Dict[str, any]:
        """
        Check the state of eyes in an image
        
        Returns:
            dict: {
                'state': 'open'|'closed'|'half_closed'|'unknown',
                'confidence': float (0-1),
                'suitable_for_comic': bool,
                'eye_aspect_ratio': float
            }
        """
        img = cv2.imread(image_path)
        if img is None:
            return {
                'state': 'unknown',
                'confidence': 0.0,
                'suitable_for_comic': False,
                'eye_aspect_ratio': 0.0
            }
        
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Detect faces
        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
        if len(faces) == 0:
            return {
                'state': 'unknown',
                'confidence': 0.0,
                'suitable_for_comic': True,  # No face, might be background
                'eye_aspect_ratio': 0.0
            }
        
        # Process the largest face
        x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
        face_roi = gray[y:y+h, x:x+w]
        
        # Detect eyes in face region
        eyes = self.eye_cascade.detectMultiScale(face_roi, 1.05, 5)
        
        if len(eyes) < 2:
            # Less than 2 eyes detected - might be closed or profile view
            return {
                'state': 'possibly_closed',
                'confidence': 0.5,
                'suitable_for_comic': False,
                'eye_aspect_ratio': 0.0
            }
        
        # Calculate eye metrics
        eye_metrics = self._analyze_eye_openness(face_roi, eyes)
        
        # Determine state
        state, confidence, suitable = self._determine_eye_state(eye_metrics)
        
        return {
            'state': state,
            'confidence': confidence,
            'suitable_for_comic': suitable,
            'eye_aspect_ratio': eye_metrics['average_ear']
        }
    
    def _analyze_eye_openness(self, face_roi, eyes) -> Dict[str, float]:
        """Analyze how open the eyes are"""
        eye_aspects = []
        
        for (ex, ey, ew, eh) in eyes[:2]:  # Process first two eyes
            eye_roi = face_roi[ey:ey+eh, ex:ex+ew]
            
            # Calculate eye aspect ratio (simplified)
            # In a real implementation, we'd use facial landmarks
            # Here we use a simpler approach based on eye region intensity
            
            # Check vertical gradient (open eyes have more gradient)
            gradient = cv2.Sobel(eye_roi, cv2.CV_64F, 0, 1, ksize=3)
            gradient_magnitude = np.abs(gradient).mean()
            
            # Check darkness ratio (closed eyes are darker)
            mean_intensity = eye_roi.mean()
            
            # Estimate eye aspect ratio
            ear = self._estimate_ear(gradient_magnitude, mean_intensity, eh)
            eye_aspects.append(ear)
        
        return {
            'average_ear': np.mean(eye_aspects) if eye_aspects else 0.0,
            'min_ear': min(eye_aspects) if eye_aspects else 0.0,
            'max_ear': max(eye_aspects) if eye_aspects else 0.0
        }
    
    def _estimate_ear(self, gradient, intensity, height) -> float:
        """Estimate eye aspect ratio from simple features"""
        # Normalize features
        gradient_score = min(gradient / 50.0, 1.0)
        intensity_score = min(intensity / 150.0, 1.0)
        height_score = min(height / 30.0, 1.0)
        
        # Combine scores (higher = more open)
        ear = (gradient_score * 0.5 + intensity_score * 0.3 + height_score * 0.2)
        return ear
    
    def _determine_eye_state(self, metrics: Dict[str, float]) -> Tuple[str, float, bool]:
        """Determine eye state from metrics"""
        ear = metrics['average_ear']
        
        if ear < self.EAR_THRESHOLD_CLOSED:
            return 'closed', 0.8, False
        elif ear < self.EAR_THRESHOLD_HALF:
            return 'half_closed', 0.7, False
        elif ear < self.EAR_THRESHOLD_OPEN:
            return 'partially_open', 0.6, True  # Acceptable but not ideal
        else:
            return 'open', 0.9, True
    
    def select_best_frame(self, frame_paths: List[str], target_emotion: str = None) -> str:
        """
        Select the best frame from a list, avoiding half-closed eyes
        
        Args:
            frame_paths: List of frame file paths
            target_emotion: Optional emotion to match
            
        Returns:
            Path to the best frame
        """
        frame_scores = []
        
        for frame_path in frame_paths:
            eye_state = self.check_eyes_state(frame_path)
            
            # Calculate score
            score = 0.0
            
            # Eye state scoring
            if eye_state['state'] == 'open':
                score += 1.0
            elif eye_state['state'] == 'partially_open':
                score += 0.7
            elif eye_state['state'] == 'half_closed':
                score += 0.2
            else:
                score += 0.1
            
            # Confidence bonus
            score += eye_state['confidence'] * 0.3
            
            # Suitability check
            if not eye_state['suitable_for_comic']:
                score *= 0.5  # Penalize unsuitable frames
            
            frame_scores.append((frame_path, score, eye_state))
        
        # Sort by score and return best
        frame_scores.sort(key=lambda x: x[1], reverse=True)
        
        if frame_scores:
            best_frame, best_score, best_state = frame_scores[0]
            print(f"  👁️ Selected frame with {best_state['state']} eyes (score: {best_score:.2f})")
            return best_frame
        
        return frame_paths[0] if frame_paths else None


def enhance_frame_selection(video_path: str, subtitle, output_dir: str, frames_to_extract: int = 5):
    """
    Extract multiple frames and select the best one (no half-closed eyes)
    
    Args:
        video_path: Path to video file
        subtitle: Subtitle object with start/end times
        output_dir: Directory to save the selected frame
        frames_to_extract: Number of candidate frames to extract
        
    Returns:
        Path to the selected frame
    """
    import tempfile
    
    detector = EyeStateDetector()
    
    # Create temp directory for candidate frames
    temp_dir = tempfile.mkdtemp()
    
    try:
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        
        # Calculate time range
        start_time = subtitle.start.total_seconds()
        end_time = subtitle.end.total_seconds()
        duration = end_time - start_time
        
        # Extract multiple frames across the subtitle duration
        candidate_frames = []
        
        for i in range(frames_to_extract):
            # Distribute frames evenly across the duration
            time_offset = (i + 1) / (frames_to_extract + 1) * duration
            timestamp = start_time + time_offset
            frame_num = int(timestamp * fps)
            
            # Extract frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
            ret, frame = cap.read()
            
            if ret:
                temp_path = os.path.join(temp_dir, f"candidate_{i}.png")
                cv2.imwrite(temp_path, frame)
                candidate_frames.append(temp_path)
        
        cap.release()
        
        # Select best frame
        if candidate_frames:
            best_frame_path = detector.select_best_frame(candidate_frames)
            
            # Copy best frame to output
            if best_frame_path:
                output_path = os.path.join(output_dir, f"frame_{subtitle.index:03d}.png")
                img = cv2.imread(best_frame_path)
                cv2.imwrite(output_path, img)
                return output_path
        
    finally:
        # Clean up temp files
        import shutil
        shutil.rmtree(temp_dir, ignore_errors=True)
    
    return None