Spaces:

PlaceHolderOrg
/

SwapMe

Running

SwapMe / src /face_detection.py

Help

V1 and multfiel support with ig support and clean code

4ff9d22 11 days ago

11.6 kB

	"""
	Face Detection Module

	This module handles detecting faces in images and videos.
	It's responsible for finding where faces are in an image/video so we can swap them.

	For Non-Technical Developers:
	- Uses AI to find faces in photos and videos
	- Returns information about each face (location, confidence, landmarks like eyes/nose)
	- Handles difficult situations like poor lighting, small faces, or rotated heads
	- Caches results to avoid re-processing the same image multiple times
	"""

	import cv2
	import numpy as np
	import threading
	import traceback
	import insightface
	import torch
	import onnxruntime as ort
	from functools import lru_cache
	from src.config import (
	DEVICE, FACE_DETECTION_THRESHOLDS, MIN_FACE_CONFIDENCE,
	FACE_POSE_THRESHOLD, MIN_FACE_AREA_RATIO, EMBEDDING_CACHE_SIZE,
	ONNX_INTRA_OP_THREADS, DEBUG_MODE, DEFAULT_HEADERS, DOWNLOAD_TIMEOUT
	)

	import requests
	import io

	# ==================== INITIALIZATION ====================

	# Initialize the face analyzer (the AI model that detects faces)
	try:
	face_analyser = insightface.app.FaceAnalysis(name='buffalo_l')
	face_analyser.prepare(
	ctx_id=0 if DEVICE == 'cuda' else -1, # GPU if available, CPU otherwise
	det_size=(640, 640) # Detection resolution
	)
	if DEBUG_MODE:
	print("✓ Face analyzer initialized successfully")
	except Exception as e:
	print(f"✗ CRITICAL: FaceAnalysis failed: {e}")
	face_analyser = None

	# Thread-safe locks (these prevent multiple threads from accessing faces at the same time)
	face_lock = threading.Lock() # Lock for using face_analyser
	detection_lock = threading.Lock() # Lock for detection operations


	# ==================== HELPER FUNCTIONS ====================

	def set_detection_threshold(thresh: float) -> None:
	"""
	Adjust how confident the face detector needs to be to report a face.

	Threshold explanation:
	- 0.99 = Only report VERY confident detections (might miss some faces)
	- 0.50 = Report medium-confidence detections
	- 0.05 = Report even uncertain detections (might have false positives)

	Args:
	thresh: Confidence threshold (0.0 to 1.0)
	"""
	global face_analyser

	if face_analyser is None:
	return

	try:
	# The face analyzer stores its detection model somewhere
	# We try different possible locations
	if hasattr(face_analyser, 'det_model'):
	face_analyser.det_model.det_thresh = thresh
	elif hasattr(face_analyser, 'models') and 'detection' in face_analyser.models:
	face_analyser.models['detection'].det_thresh = thresh
	except Exception as e:
	if DEBUG_MODE:
	print(f"Warning: Could not set detection threshold: {e}")


	def enhance_image_for_detection(bgr_image: np.ndarray) -> np.ndarray:
	"""
	Improve image quality to help face detection work better.

	This is like putting on glasses to see better! It enhances contrast and sharpness
	so faces are easier to spot, especially in poor lighting.

	Technique explanation:
	- CLAHE: Makes dark areas lighter and light areas darker (better contrast)
	- Sharpening filter: Makes edges more crisp and defined

	Args:
	bgr_image: The image to enhance (in BGR color format)

	Returns:
	Enhanced image that's easier to detect faces in
	"""
	if bgr_image is None or bgr_image.size == 0:
	return bgr_image

	# Step 1: Convert from BGR (what OpenCV uses) to LAB (better for contrast)
	lab = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2LAB)
	l, a, b = cv2.split(lab)

	# Step 2: Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
	# This is a fancy way of saying "make the image more contrasty"
	clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
	l = clahe.apply(l)

	# Step 3: Convert back to BGR
	enhanced = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR)

	# Step 4: Apply sharpening filter (makes edges stand out)
	sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
	sharpened = cv2.filter2D(enhanced, -1, sharpening_kernel)

	return sharpened


	def _pick_largest_face(faces: list) -> object:
	"""
	If there are multiple faces, return the biggest one.

	We assume the biggest face is the most important/relevant one.

	Args:
	faces: List of detected faces

	Returns:
	The largest face object, or None if no faces
	"""
	if not faces:
	return None

	# Calculate area of each face and return the one with largest area
	return sorted(
	faces,
	key=lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]),
	reverse=True
	)[0]


	def is_face_landmark_valid(face: object, frame_shape: tuple, min_confidence: float) -> bool:
	"""
	Check if a detected face's landmarks (eyes, nose, mouth, etc.) are valid.

	This is important for certain face swap methods that rely on facial features.
	A valid face should have:
	- Enough visible landmarks
	- Face not rotated too much
	- Reasonable confidence score
	- Face large enough to process

	Args:
	face: The detected face object
	frame_shape: Shape of the frame (height, width, channels)
	min_confidence: Minimum confidence score required

	Returns:
	True if the face's landmarks look good, False otherwise
	"""
	if face is None:
	return False

	h, w = frame_shape[:2]

	# Check 1: Does the face have landmarks (eyes, nose, etc.)?
	kps = getattr(face, 'kps', None) # kps = keypoints
	if kps is None or len(kps) < 5:
	return False

	# Check 2: Are all landmarks inside the frame (with some margin)?
	MARGIN = 10 # pixels
	for x, y in kps:
	if not (-MARGIN <= x <= w + MARGIN and -MARGIN <= y <= h + MARGIN):
	return False # Landmark is way outside the frame

	# Check 3: Is the face box valid?
	bx1, by1, bx2, by2 = face.bbox
	face_area = (bx2 - bx1) * (by2 - by1)
	frame_area = w * h

	# Face must be at least 1% of the frame
	if face_area < MIN_FACE_AREA_RATIO * frame_area:
	return False

	# Check 4: Is the head rotated too much? (better faces are mostly facing camera)
	pose = getattr(face, 'pose', None) # yaw, pitch, roll angles
	if pose is not None and len(pose) >= 3:
	head_roll = pose[2] # Roll (rotation left-to-right)
	if abs(head_roll) > FACE_POSE_THRESHOLD:
	return False # Head is tilted too much

	# Check 5: Is the detection confidence high enough?
	det_score = getattr(face, 'det_score', 1.0)
	if det_score < min_confidence:
	return False

	return True


	# ==================== FACE DETECTION FUNCTIONS ====================

	def detect_faces_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> list:
	"""
	Detect all faces in an image.

	Args:
	bgr_image: The image to search for faces (BGR format from OpenCV)
	threshold: How confident the detector needs to be (0.0-1.0)

	Returns:
	List of detected face objects (empty list if no faces found)
	"""
	global face_analyser

	if face_analyser is None:
	return []

	if bgr_image is None or bgr_image.size == 0:
	return []

	try:
	with face_lock:
	set_detection_threshold(threshold)
	faces = face_analyser.get(bgr_image)

	# If no faces found, try with enhanced image
	if not faces:
	enhanced = enhance_image_for_detection(bgr_image)
	faces = face_analyser.get(enhanced)

	return faces

	except Exception as e:
	if DEBUG_MODE:
	print(f"Error during face detection: {e}")
	print(traceback.format_exc())
	return []


	def get_best_face_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> object:
	"""
	Find the most prominent face in an image.

	"Prominent" means the largest/most visible face - probably the main subject.

	Args:
	bgr_image: The image to search
	threshold: Detection confidence threshold (0.0-1.0)

	Returns:
	The face object for the largest face, or None if no faces found
	"""
	faces = detect_faces_in_image(bgr_image, threshold)
	return _pick_largest_face(faces)


	def detect_faces_with_multiscale(bgr_image: np.ndarray) -> object:
	"""
	Try to detect faces using multiple confidence thresholds.

	Some faces are hard to detect. They might be in shadow, turned away, or small.
	This method tries progressively lower confidence thresholds to find them.

	Args:
	bgr_image: The image to search

	Returns:
	The largest face found at any confidence level, or None
	"""
	for threshold in FACE_DETECTION_THRESHOLDS:
	face = get_best_face_in_image(bgr_image, threshold)
	if face:
	return face

	return None


	@lru_cache(maxsize=EMBEDDING_CACHE_SIZE)
	def get_face_embedding_from_url(url: str) -> object:
	"""
	Download an image from URL and get the face embedding (fingerprint) from it.

	A face embedding is like a fingerprint of a face - it uniquely identifies
	the person's face. We use it to know which face to swap FROM.

	This result is cached (remembered) so we don't have to re-download
	and re-analyze the same URL multiple times.

	Args:
	url: The URL of an image containing a face

	Returns:
	The face object with embedding, or None if extraction fails

	Raises:
	ValueError: If URL is invalid or download fails
	"""
	global face_analyser

	if face_analyser is None:
	raise ValueError("Face analyzer not initialized")

	try:
	# Download the image from the URL
	response = requests.get(
	url,
	headers=DEFAULT_HEADERS,
	timeout=DOWNLOAD_TIMEOUT,
	allow_redirects=True
	)
	response.raise_for_status()

	# Convert downloaded bytes to image
	arr = np.frombuffer(response.content, np.uint8)
	bgr_image = cv2.imdecode(arr, cv2.IMREAD_COLOR)

	if bgr_image is None or bgr_image.size == 0:
	raise ValueError("Downloaded image is empty or invalid")

	# Detect face in the image
	with face_lock:
	set_detection_threshold(0.20)
	faces = face_analyser.get(bgr_image)

	# If no face found, try enhanced image
	if not faces:
	faces = face_analyser.get(enhance_image_for_detection(bgr_image))

	if not faces:
	raise ValueError("No faces detected in source image")

	# Return the best (largest) face
	return _pick_largest_face(faces)

	except requests.exceptions.RequestException as e:
	raise ValueError(f"Failed to download source image: {e}")
	except Exception as e:
	if DEBUG_MODE:
	print(f"Error analyzing source image: {e}")
	print(traceback.format_exc())
	raise ValueError(f"Failed to analyze source face: {e}")


	def clear_face_embedding_cache() -> None:
	"""
	Clear the cache of downloaded face embeddings.

	Use this when you want to free up memory or refresh the cache.
	"""
	get_face_embedding_from_url.cache_clear()