""" Face Detection Module This module handles detecting faces in images and videos. It's responsible for finding where faces are in an image/video so we can swap them. For Non-Technical Developers: - Uses AI to find faces in photos and videos - Returns information about each face (location, confidence, landmarks like eyes/nose) - Handles difficult situations like poor lighting, small faces, or rotated heads - Caches results to avoid re-processing the same image multiple times """ import cv2 import numpy as np import threading import traceback import insightface import torch import onnxruntime as ort from functools import lru_cache from src.config import ( DEVICE, FACE_DETECTION_THRESHOLDS, MIN_FACE_CONFIDENCE, FACE_POSE_THRESHOLD, MIN_FACE_AREA_RATIO, EMBEDDING_CACHE_SIZE, ONNX_INTRA_OP_THREADS, DEBUG_MODE, DEFAULT_HEADERS, DOWNLOAD_TIMEOUT ) import requests import io # ==================== INITIALIZATION ==================== # Initialize the face analyzer (the AI model that detects faces) try: face_analyser = insightface.app.FaceAnalysis(name='buffalo_l') face_analyser.prepare( ctx_id=0 if DEVICE == 'cuda' else -1, # GPU if available, CPU otherwise det_size=(640, 640) # Detection resolution ) if DEBUG_MODE: print("✓ Face analyzer initialized successfully") except Exception as e: print(f"✗ CRITICAL: FaceAnalysis failed: {e}") face_analyser = None # Thread-safe locks (these prevent multiple threads from accessing faces at the same time) face_lock = threading.Lock() # Lock for using face_analyser detection_lock = threading.Lock() # Lock for detection operations # ==================== HELPER FUNCTIONS ==================== def set_detection_threshold(thresh: float) -> None: """ Adjust how confident the face detector needs to be to report a face. Threshold explanation: - 0.99 = Only report VERY confident detections (might miss some faces) - 0.50 = Report medium-confidence detections - 0.05 = Report even uncertain detections (might have false positives) Args: thresh: Confidence threshold (0.0 to 1.0) """ global face_analyser if face_analyser is None: return try: # The face analyzer stores its detection model somewhere # We try different possible locations if hasattr(face_analyser, 'det_model'): face_analyser.det_model.det_thresh = thresh elif hasattr(face_analyser, 'models') and 'detection' in face_analyser.models: face_analyser.models['detection'].det_thresh = thresh except Exception as e: if DEBUG_MODE: print(f"Warning: Could not set detection threshold: {e}") def enhance_image_for_detection(bgr_image: np.ndarray) -> np.ndarray: """ Improve image quality to help face detection work better. This is like putting on glasses to see better! It enhances contrast and sharpness so faces are easier to spot, especially in poor lighting. Technique explanation: - CLAHE: Makes dark areas lighter and light areas darker (better contrast) - Sharpening filter: Makes edges more crisp and defined Args: bgr_image: The image to enhance (in BGR color format) Returns: Enhanced image that's easier to detect faces in """ if bgr_image is None or bgr_image.size == 0: return bgr_image # Step 1: Convert from BGR (what OpenCV uses) to LAB (better for contrast) lab = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) # Step 2: Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) # This is a fancy way of saying "make the image more contrasty" clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l = clahe.apply(l) # Step 3: Convert back to BGR enhanced = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR) # Step 4: Apply sharpening filter (makes edges stand out) sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) sharpened = cv2.filter2D(enhanced, -1, sharpening_kernel) return sharpened def _pick_largest_face(faces: list) -> object: """ If there are multiple faces, return the biggest one. We assume the biggest face is the most important/relevant one. Args: faces: List of detected faces Returns: The largest face object, or None if no faces """ if not faces: return None # Calculate area of each face and return the one with largest area return sorted( faces, key=lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]), reverse=True )[0] def is_face_landmark_valid(face: object, frame_shape: tuple, min_confidence: float) -> bool: """ Check if a detected face's landmarks (eyes, nose, mouth, etc.) are valid. This is important for certain face swap methods that rely on facial features. A valid face should have: - Enough visible landmarks - Face not rotated too much - Reasonable confidence score - Face large enough to process Args: face: The detected face object frame_shape: Shape of the frame (height, width, channels) min_confidence: Minimum confidence score required Returns: True if the face's landmarks look good, False otherwise """ if face is None: return False h, w = frame_shape[:2] # Check 1: Does the face have landmarks (eyes, nose, etc.)? kps = getattr(face, 'kps', None) # kps = keypoints if kps is None or len(kps) < 5: return False # Check 2: Are all landmarks inside the frame (with some margin)? MARGIN = 10 # pixels for x, y in kps: if not (-MARGIN <= x <= w + MARGIN and -MARGIN <= y <= h + MARGIN): return False # Landmark is way outside the frame # Check 3: Is the face box valid? bx1, by1, bx2, by2 = face.bbox face_area = (bx2 - bx1) * (by2 - by1) frame_area = w * h # Face must be at least 1% of the frame if face_area < MIN_FACE_AREA_RATIO * frame_area: return False # Check 4: Is the head rotated too much? (better faces are mostly facing camera) pose = getattr(face, 'pose', None) # yaw, pitch, roll angles if pose is not None and len(pose) >= 3: head_roll = pose[2] # Roll (rotation left-to-right) if abs(head_roll) > FACE_POSE_THRESHOLD: return False # Head is tilted too much # Check 5: Is the detection confidence high enough? det_score = getattr(face, 'det_score', 1.0) if det_score < min_confidence: return False return True # ==================== FACE DETECTION FUNCTIONS ==================== def detect_faces_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> list: """ Detect all faces in an image. Args: bgr_image: The image to search for faces (BGR format from OpenCV) threshold: How confident the detector needs to be (0.0-1.0) Returns: List of detected face objects (empty list if no faces found) """ global face_analyser if face_analyser is None: return [] if bgr_image is None or bgr_image.size == 0: return [] try: with face_lock: set_detection_threshold(threshold) faces = face_analyser.get(bgr_image) # If no faces found, try with enhanced image if not faces: enhanced = enhance_image_for_detection(bgr_image) faces = face_analyser.get(enhanced) return faces except Exception as e: if DEBUG_MODE: print(f"Error during face detection: {e}") print(traceback.format_exc()) return [] def get_best_face_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> object: """ Find the most prominent face in an image. "Prominent" means the largest/most visible face - probably the main subject. Args: bgr_image: The image to search threshold: Detection confidence threshold (0.0-1.0) Returns: The face object for the largest face, or None if no faces found """ faces = detect_faces_in_image(bgr_image, threshold) return _pick_largest_face(faces) def detect_faces_with_multiscale(bgr_image: np.ndarray) -> object: """ Try to detect faces using multiple confidence thresholds. Some faces are hard to detect. They might be in shadow, turned away, or small. This method tries progressively lower confidence thresholds to find them. Args: bgr_image: The image to search Returns: The largest face found at any confidence level, or None """ for threshold in FACE_DETECTION_THRESHOLDS: face = get_best_face_in_image(bgr_image, threshold) if face: return face return None @lru_cache(maxsize=EMBEDDING_CACHE_SIZE) def get_face_embedding_from_url(url: str) -> object: """ Download an image from URL and get the face embedding (fingerprint) from it. A face embedding is like a fingerprint of a face - it uniquely identifies the person's face. We use it to know which face to swap FROM. This result is cached (remembered) so we don't have to re-download and re-analyze the same URL multiple times. Args: url: The URL of an image containing a face Returns: The face object with embedding, or None if extraction fails Raises: ValueError: If URL is invalid or download fails """ global face_analyser if face_analyser is None: raise ValueError("Face analyzer not initialized") try: # Download the image from the URL response = requests.get( url, headers=DEFAULT_HEADERS, timeout=DOWNLOAD_TIMEOUT, allow_redirects=True ) response.raise_for_status() # Convert downloaded bytes to image arr = np.frombuffer(response.content, np.uint8) bgr_image = cv2.imdecode(arr, cv2.IMREAD_COLOR) if bgr_image is None or bgr_image.size == 0: raise ValueError("Downloaded image is empty or invalid") # Detect face in the image with face_lock: set_detection_threshold(0.20) faces = face_analyser.get(bgr_image) # If no face found, try enhanced image if not faces: faces = face_analyser.get(enhance_image_for_detection(bgr_image)) if not faces: raise ValueError("No faces detected in source image") # Return the best (largest) face return _pick_largest_face(faces) except requests.exceptions.RequestException as e: raise ValueError(f"Failed to download source image: {e}") except Exception as e: if DEBUG_MODE: print(f"Error analyzing source image: {e}") print(traceback.format_exc()) raise ValueError(f"Failed to analyze source face: {e}") def clear_face_embedding_cache() -> None: """ Clear the cache of downloaded face embeddings. Use this when you want to free up memory or refresh the cache. """ get_face_embedding_from_url.cache_clear()