SwapMe / src /face_detection.py
Help
V1 and multfiel support with ig support and clean code
4ff9d22
"""
Face Detection Module
This module handles detecting faces in images and videos.
It's responsible for finding where faces are in an image/video so we can swap them.
For Non-Technical Developers:
- Uses AI to find faces in photos and videos
- Returns information about each face (location, confidence, landmarks like eyes/nose)
- Handles difficult situations like poor lighting, small faces, or rotated heads
- Caches results to avoid re-processing the same image multiple times
"""
import cv2
import numpy as np
import threading
import traceback
import insightface
import torch
import onnxruntime as ort
from functools import lru_cache
from src.config import (
DEVICE, FACE_DETECTION_THRESHOLDS, MIN_FACE_CONFIDENCE,
FACE_POSE_THRESHOLD, MIN_FACE_AREA_RATIO, EMBEDDING_CACHE_SIZE,
ONNX_INTRA_OP_THREADS, DEBUG_MODE, DEFAULT_HEADERS, DOWNLOAD_TIMEOUT
)
import requests
import io
# ==================== INITIALIZATION ====================
# Initialize the face analyzer (the AI model that detects faces)
try:
face_analyser = insightface.app.FaceAnalysis(name='buffalo_l')
face_analyser.prepare(
ctx_id=0 if DEVICE == 'cuda' else -1, # GPU if available, CPU otherwise
det_size=(640, 640) # Detection resolution
)
if DEBUG_MODE:
print("✓ Face analyzer initialized successfully")
except Exception as e:
print(f"✗ CRITICAL: FaceAnalysis failed: {e}")
face_analyser = None
# Thread-safe locks (these prevent multiple threads from accessing faces at the same time)
face_lock = threading.Lock() # Lock for using face_analyser
detection_lock = threading.Lock() # Lock for detection operations
# ==================== HELPER FUNCTIONS ====================
def set_detection_threshold(thresh: float) -> None:
"""
Adjust how confident the face detector needs to be to report a face.
Threshold explanation:
- 0.99 = Only report VERY confident detections (might miss some faces)
- 0.50 = Report medium-confidence detections
- 0.05 = Report even uncertain detections (might have false positives)
Args:
thresh: Confidence threshold (0.0 to 1.0)
"""
global face_analyser
if face_analyser is None:
return
try:
# The face analyzer stores its detection model somewhere
# We try different possible locations
if hasattr(face_analyser, 'det_model'):
face_analyser.det_model.det_thresh = thresh
elif hasattr(face_analyser, 'models') and 'detection' in face_analyser.models:
face_analyser.models['detection'].det_thresh = thresh
except Exception as e:
if DEBUG_MODE:
print(f"Warning: Could not set detection threshold: {e}")
def enhance_image_for_detection(bgr_image: np.ndarray) -> np.ndarray:
"""
Improve image quality to help face detection work better.
This is like putting on glasses to see better! It enhances contrast and sharpness
so faces are easier to spot, especially in poor lighting.
Technique explanation:
- CLAHE: Makes dark areas lighter and light areas darker (better contrast)
- Sharpening filter: Makes edges more crisp and defined
Args:
bgr_image: The image to enhance (in BGR color format)
Returns:
Enhanced image that's easier to detect faces in
"""
if bgr_image is None or bgr_image.size == 0:
return bgr_image
# Step 1: Convert from BGR (what OpenCV uses) to LAB (better for contrast)
lab = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
# Step 2: Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
# This is a fancy way of saying "make the image more contrasty"
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
l = clahe.apply(l)
# Step 3: Convert back to BGR
enhanced = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR)
# Step 4: Apply sharpening filter (makes edges stand out)
sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened = cv2.filter2D(enhanced, -1, sharpening_kernel)
return sharpened
def _pick_largest_face(faces: list) -> object:
"""
If there are multiple faces, return the biggest one.
We assume the biggest face is the most important/relevant one.
Args:
faces: List of detected faces
Returns:
The largest face object, or None if no faces
"""
if not faces:
return None
# Calculate area of each face and return the one with largest area
return sorted(
faces,
key=lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]),
reverse=True
)[0]
def is_face_landmark_valid(face: object, frame_shape: tuple, min_confidence: float) -> bool:
"""
Check if a detected face's landmarks (eyes, nose, mouth, etc.) are valid.
This is important for certain face swap methods that rely on facial features.
A valid face should have:
- Enough visible landmarks
- Face not rotated too much
- Reasonable confidence score
- Face large enough to process
Args:
face: The detected face object
frame_shape: Shape of the frame (height, width, channels)
min_confidence: Minimum confidence score required
Returns:
True if the face's landmarks look good, False otherwise
"""
if face is None:
return False
h, w = frame_shape[:2]
# Check 1: Does the face have landmarks (eyes, nose, etc.)?
kps = getattr(face, 'kps', None) # kps = keypoints
if kps is None or len(kps) < 5:
return False
# Check 2: Are all landmarks inside the frame (with some margin)?
MARGIN = 10 # pixels
for x, y in kps:
if not (-MARGIN <= x <= w + MARGIN and -MARGIN <= y <= h + MARGIN):
return False # Landmark is way outside the frame
# Check 3: Is the face box valid?
bx1, by1, bx2, by2 = face.bbox
face_area = (bx2 - bx1) * (by2 - by1)
frame_area = w * h
# Face must be at least 1% of the frame
if face_area < MIN_FACE_AREA_RATIO * frame_area:
return False
# Check 4: Is the head rotated too much? (better faces are mostly facing camera)
pose = getattr(face, 'pose', None) # yaw, pitch, roll angles
if pose is not None and len(pose) >= 3:
head_roll = pose[2] # Roll (rotation left-to-right)
if abs(head_roll) > FACE_POSE_THRESHOLD:
return False # Head is tilted too much
# Check 5: Is the detection confidence high enough?
det_score = getattr(face, 'det_score', 1.0)
if det_score < min_confidence:
return False
return True
# ==================== FACE DETECTION FUNCTIONS ====================
def detect_faces_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> list:
"""
Detect all faces in an image.
Args:
bgr_image: The image to search for faces (BGR format from OpenCV)
threshold: How confident the detector needs to be (0.0-1.0)
Returns:
List of detected face objects (empty list if no faces found)
"""
global face_analyser
if face_analyser is None:
return []
if bgr_image is None or bgr_image.size == 0:
return []
try:
with face_lock:
set_detection_threshold(threshold)
faces = face_analyser.get(bgr_image)
# If no faces found, try with enhanced image
if not faces:
enhanced = enhance_image_for_detection(bgr_image)
faces = face_analyser.get(enhanced)
return faces
except Exception as e:
if DEBUG_MODE:
print(f"Error during face detection: {e}")
print(traceback.format_exc())
return []
def get_best_face_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> object:
"""
Find the most prominent face in an image.
"Prominent" means the largest/most visible face - probably the main subject.
Args:
bgr_image: The image to search
threshold: Detection confidence threshold (0.0-1.0)
Returns:
The face object for the largest face, or None if no faces found
"""
faces = detect_faces_in_image(bgr_image, threshold)
return _pick_largest_face(faces)
def detect_faces_with_multiscale(bgr_image: np.ndarray) -> object:
"""
Try to detect faces using multiple confidence thresholds.
Some faces are hard to detect. They might be in shadow, turned away, or small.
This method tries progressively lower confidence thresholds to find them.
Args:
bgr_image: The image to search
Returns:
The largest face found at any confidence level, or None
"""
for threshold in FACE_DETECTION_THRESHOLDS:
face = get_best_face_in_image(bgr_image, threshold)
if face:
return face
return None
@lru_cache(maxsize=EMBEDDING_CACHE_SIZE)
def get_face_embedding_from_url(url: str) -> object:
"""
Download an image from URL and get the face embedding (fingerprint) from it.
A face embedding is like a fingerprint of a face - it uniquely identifies
the person's face. We use it to know which face to swap FROM.
This result is cached (remembered) so we don't have to re-download
and re-analyze the same URL multiple times.
Args:
url: The URL of an image containing a face
Returns:
The face object with embedding, or None if extraction fails
Raises:
ValueError: If URL is invalid or download fails
"""
global face_analyser
if face_analyser is None:
raise ValueError("Face analyzer not initialized")
try:
# Download the image from the URL
response = requests.get(
url,
headers=DEFAULT_HEADERS,
timeout=DOWNLOAD_TIMEOUT,
allow_redirects=True
)
response.raise_for_status()
# Convert downloaded bytes to image
arr = np.frombuffer(response.content, np.uint8)
bgr_image = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if bgr_image is None or bgr_image.size == 0:
raise ValueError("Downloaded image is empty or invalid")
# Detect face in the image
with face_lock:
set_detection_threshold(0.20)
faces = face_analyser.get(bgr_image)
# If no face found, try enhanced image
if not faces:
faces = face_analyser.get(enhance_image_for_detection(bgr_image))
if not faces:
raise ValueError("No faces detected in source image")
# Return the best (largest) face
return _pick_largest_face(faces)
except requests.exceptions.RequestException as e:
raise ValueError(f"Failed to download source image: {e}")
except Exception as e:
if DEBUG_MODE:
print(f"Error analyzing source image: {e}")
print(traceback.format_exc())
raise ValueError(f"Failed to analyze source face: {e}")
def clear_face_embedding_cache() -> None:
"""
Clear the cache of downloaded face embeddings.
Use this when you want to free up memory or refresh the cache.
"""
get_face_embedding_from_url.cache_clear()