| |
| """ |
| Modern Face Detection for Accurate Bubble Placement |
| Uses state-of-the-art models for better face and lip detection |
| """ |
|
|
| import cv2 |
| import numpy as np |
| import os |
| from typing import Tuple, List, Optional |
|
|
| class ModernFaceDetector: |
| def __init__(self): |
| """Initialize modern face detection models""" |
| |
| |
| try: |
| import mediapipe as mp |
| self.mp_face_mesh = mp.solutions.face_mesh |
| self.face_mesh = self.mp_face_mesh.FaceMesh( |
| static_image_mode=True, |
| max_num_faces=4, |
| refine_landmarks=True, |
| min_detection_confidence=0.5 |
| ) |
| self.use_mediapipe = True |
| print("Using MediaPipe face detection") |
| except ImportError: |
| self.use_mediapipe = False |
| print("MediaPipe not available, using OpenCV") |
| |
| |
| if not self.use_mediapipe: |
| |
| model_path = "backend/speech_bubble/face_detection_yunet_2023mar.onnx" |
| if not os.path.exists(model_path): |
| |
| self._download_face_model() |
| |
| self.face_detector = cv2.FaceDetectorYN_create( |
| model_path, |
| "", |
| (320, 320), |
| 0.9, |
| 0.3, |
| 5000 |
| ) |
| |
| def _download_face_model(self): |
| """Download OpenCV face detection model if not available""" |
| import urllib.request |
| url = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx" |
| print(f"Downloading face detection model from {url}") |
| urllib.request.urlretrieve(url, "backend/speech_bubble/face_detection_yunet_2023mar.onnx") |
| |
| def detect_faces_mediapipe(self, image) -> List[Tuple[int, int]]: |
| """Detect faces using MediaPipe (most accurate)""" |
| |
| if isinstance(image, str): |
| img = cv2.imread(image) |
| else: |
| img = image |
| |
| if img is None: |
| return [(-1, -1)] |
| |
| rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| results = self.face_mesh.process(rgb_image) |
| |
| lip_positions = [] |
| if results.multi_face_landmarks: |
| for face_landmarks in results.multi_face_landmarks: |
| |
| |
| upper_lip = face_landmarks.landmark[13] |
| |
| lower_lip = face_landmarks.landmark[14] |
| |
| |
| lip_x = int((upper_lip.x + lower_lip.x) / 2 * image.shape[1]) |
| lip_y = int((upper_lip.y + lower_lip.y) / 2 * image.shape[0]) |
| |
| lip_positions.append((lip_x, lip_y)) |
| |
| return lip_positions if lip_positions else [(-1, -1)] |
| |
| def detect_faces_opencv(self, image) -> List[Tuple[int, int]]: |
| """Detect faces using OpenCV DNN (fallback)""" |
| |
| if isinstance(image, str): |
| img = cv2.imread(image) |
| else: |
| img = image |
| |
| if img is None: |
| return [(-1, -1)] |
| |
| height, width = img.shape[:2] |
| self.face_detector.setInputSize((width, height)) |
| |
| _, faces = self.face_detector.detect(img) |
| lip_positions = [] |
| |
| if faces is not None: |
| for face in faces: |
| |
| x, y, w, h = face[:4].astype(int) |
| |
| |
| lip_x = x + w // 2 |
| lip_y = y + int(h * 0.7) |
| |
| lip_positions.append((lip_x, lip_y)) |
| |
| return lip_positions if lip_positions else [(-1, -1)] |
| |
| def detect_faces(self, image) -> List[Tuple[int, int]]: |
| """Main face detection method""" |
| if self.use_mediapipe: |
| return self.detect_faces_mediapipe(image) |
| else: |
| return self.detect_faces_opencv(image) |
|
|
| def get_modern_lip_positions(video_path: str, frame_paths: List[str]) -> dict: |
| """ |
| Get lip positions using modern face detection |
| Returns: {frame_index: (lip_x, lip_y)} |
| """ |
| detector = ModernFaceDetector() |
| lip_positions = {} |
| |
| for i, frame_path in enumerate(frame_paths, 1): |
| if os.path.exists(frame_path): |
| positions = detector.detect_faces(frame_path) |
| |
| lip_positions[i] = positions[0] if positions else (-1, -1) |
| else: |
| lip_positions[i] = (-1, -1) |
| |
| return lip_positions |
|
|
| if __name__ == "__main__": |
| |
| test_image = "frames/final/frame001.png" |
| if os.path.exists(test_image): |
| detector = ModernFaceDetector() |
| positions = detector.detect_faces(test_image) |
| print(f"Detected lip positions: {positions}") |
| else: |
| print("Test image not found") |