fast

Build error

App Files Files Community

fast / backend /speech_bubble /modern_face_detection.py

3v324v23

Update Comic123 with local comic folder files

83e35a7 7 months ago

raw

history blame contribute delete

5.5 kB

	#!/usr/bin/env python3
	"""
	Modern Face Detection for Accurate Bubble Placement
	Uses state-of-the-art models for better face and lip detection
	"""

	import cv2
	import numpy as np
	import os
	from typing import Tuple, List, Optional

	class ModernFaceDetector:
	def __init__(self):
	"""Initialize modern face detection models"""

	# Option 1: MediaPipe (Google's modern face detection)
	try:
	import mediapipe as mp
	self.mp_face_mesh = mp.solutions.face_mesh
	self.face_mesh = self.mp_face_mesh.FaceMesh(
	static_image_mode=True,
	max_num_faces=4,
	refine_landmarks=True,
	min_detection_confidence=0.5
	)
	self.use_mediapipe = True
	print("Using MediaPipe face detection")
	except ImportError:
	self.use_mediapipe = False
	print("MediaPipe not available, using OpenCV")

	# Option 2: OpenCV DNN face detector (more modern than dlib)
	if not self.use_mediapipe:
	# Load OpenCV's DNN face detector
	model_path = "backend/speech_bubble/face_detection_yunet_2023mar.onnx"
	if not os.path.exists(model_path):
	# Download if not available
	self._download_face_model()

	self.face_detector = cv2.FaceDetectorYN_create(
	model_path,
	"",
	(320, 320),
	0.9,
	0.3,
	5000
	)

	def _download_face_model(self):
	"""Download OpenCV face detection model if not available"""
	import urllib.request
	url = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
	print(f"Downloading face detection model from {url}")
	urllib.request.urlretrieve(url, "backend/speech_bubble/face_detection_yunet_2023mar.onnx")

	def detect_faces_mediapipe(self, image) -> List[Tuple[int, int]]:
	"""Detect faces using MediaPipe (most accurate)"""
	# Handle both file paths and image objects
	if isinstance(image, str):
	img = cv2.imread(image)
	else:
	img = image

	if img is None:
	return [(-1, -1)]

	rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	results = self.face_mesh.process(rgb_image)

	lip_positions = []
	if results.multi_face_landmarks:
	for face_landmarks in results.multi_face_landmarks:
	# MediaPipe lip landmarks (more accurate than dlib)
	# Upper lip center
	upper_lip = face_landmarks.landmark[13] # Upper lip center
	# Lower lip center
	lower_lip = face_landmarks.landmark[14] # Lower lip center

	# Calculate lip center
	lip_x = int((upper_lip.x + lower_lip.x) / 2 * image.shape[1])
	lip_y = int((upper_lip.y + lower_lip.y) / 2 * image.shape[0])

	lip_positions.append((lip_x, lip_y))

	return lip_positions if lip_positions else [(-1, -1)]

	def detect_faces_opencv(self, image) -> List[Tuple[int, int]]:
	"""Detect faces using OpenCV DNN (fallback)"""
	# Handle both file paths and image objects
	if isinstance(image, str):
	img = cv2.imread(image)
	else:
	img = image

	if img is None:
	return [(-1, -1)]

	height, width = img.shape[:2]
	self.face_detector.setInputSize((width, height))

	_, faces = self.face_detector.detect(img)
	lip_positions = []

	if faces is not None:
	for face in faces:
	# Extract face bounding box
	x, y, w, h = face[:4].astype(int)

	# Estimate lip position (center of lower face area)
	lip_x = x + w // 2
	lip_y = y + int(h * 0.7) # 70% down the face (lip area)

	lip_positions.append((lip_x, lip_y))

	return lip_positions if lip_positions else [(-1, -1)]

	def detect_faces(self, image) -> List[Tuple[int, int]]:
	"""Main face detection method"""
	if self.use_mediapipe:
	return self.detect_faces_mediapipe(image)
	else:
	return self.detect_faces_opencv(image)

	def get_modern_lip_positions(video_path: str, frame_paths: List[str]) -> dict:
	"""
	Get lip positions using modern face detection
	Returns: {frame_index: (lip_x, lip_y)}
	"""
	detector = ModernFaceDetector()
	lip_positions = {}

	for i, frame_path in enumerate(frame_paths, 1):
	if os.path.exists(frame_path):
	positions = detector.detect_faces(frame_path)
	# Use the first detected face (most prominent)
	lip_positions[i] = positions[0] if positions else (-1, -1)
	else:
	lip_positions[i] = (-1, -1)

	return lip_positions

	if __name__ == "__main__":
	# Test the modern face detector
	test_image = "frames/final/frame001.png"
	if os.path.exists(test_image):
	detector = ModernFaceDetector()
	positions = detector.detect_faces(test_image)
	print(f"Detected lip positions: {positions}")
	else:
	print("Test image not found")