Spaces:
Running
Running
| """ | |
| Face Detection Module | |
| This module handles detecting faces in images and videos. | |
| It's responsible for finding where faces are in an image/video so we can swap them. | |
| For Non-Technical Developers: | |
| - Uses AI to find faces in photos and videos | |
| - Returns information about each face (location, confidence, landmarks like eyes/nose) | |
| - Handles difficult situations like poor lighting, small faces, or rotated heads | |
| - Caches results to avoid re-processing the same image multiple times | |
| """ | |
| import cv2 | |
| import numpy as np | |
| import threading | |
| import traceback | |
| import insightface | |
| import torch | |
| import onnxruntime as ort | |
| from functools import lru_cache | |
| from src.config import ( | |
| DEVICE, FACE_DETECTION_THRESHOLDS, MIN_FACE_CONFIDENCE, | |
| FACE_POSE_THRESHOLD, MIN_FACE_AREA_RATIO, EMBEDDING_CACHE_SIZE, | |
| ONNX_INTRA_OP_THREADS, DEBUG_MODE, DEFAULT_HEADERS, DOWNLOAD_TIMEOUT | |
| ) | |
| import requests | |
| import io | |
| # ==================== INITIALIZATION ==================== | |
| # Initialize the face analyzer (the AI model that detects faces) | |
| try: | |
| face_analyser = insightface.app.FaceAnalysis(name='buffalo_l') | |
| face_analyser.prepare( | |
| ctx_id=0 if DEVICE == 'cuda' else -1, # GPU if available, CPU otherwise | |
| det_size=(640, 640) # Detection resolution | |
| ) | |
| if DEBUG_MODE: | |
| print("✓ Face analyzer initialized successfully") | |
| except Exception as e: | |
| print(f"✗ CRITICAL: FaceAnalysis failed: {e}") | |
| face_analyser = None | |
| # Thread-safe locks (these prevent multiple threads from accessing faces at the same time) | |
| face_lock = threading.Lock() # Lock for using face_analyser | |
| detection_lock = threading.Lock() # Lock for detection operations | |
| # ==================== HELPER FUNCTIONS ==================== | |
| def set_detection_threshold(thresh: float) -> None: | |
| """ | |
| Adjust how confident the face detector needs to be to report a face. | |
| Threshold explanation: | |
| - 0.99 = Only report VERY confident detections (might miss some faces) | |
| - 0.50 = Report medium-confidence detections | |
| - 0.05 = Report even uncertain detections (might have false positives) | |
| Args: | |
| thresh: Confidence threshold (0.0 to 1.0) | |
| """ | |
| global face_analyser | |
| if face_analyser is None: | |
| return | |
| try: | |
| # The face analyzer stores its detection model somewhere | |
| # We try different possible locations | |
| if hasattr(face_analyser, 'det_model'): | |
| face_analyser.det_model.det_thresh = thresh | |
| elif hasattr(face_analyser, 'models') and 'detection' in face_analyser.models: | |
| face_analyser.models['detection'].det_thresh = thresh | |
| except Exception as e: | |
| if DEBUG_MODE: | |
| print(f"Warning: Could not set detection threshold: {e}") | |
| def enhance_image_for_detection(bgr_image: np.ndarray) -> np.ndarray: | |
| """ | |
| Improve image quality to help face detection work better. | |
| This is like putting on glasses to see better! It enhances contrast and sharpness | |
| so faces are easier to spot, especially in poor lighting. | |
| Technique explanation: | |
| - CLAHE: Makes dark areas lighter and light areas darker (better contrast) | |
| - Sharpening filter: Makes edges more crisp and defined | |
| Args: | |
| bgr_image: The image to enhance (in BGR color format) | |
| Returns: | |
| Enhanced image that's easier to detect faces in | |
| """ | |
| if bgr_image is None or bgr_image.size == 0: | |
| return bgr_image | |
| # Step 1: Convert from BGR (what OpenCV uses) to LAB (better for contrast) | |
| lab = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(lab) | |
| # Step 2: Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) | |
| # This is a fancy way of saying "make the image more contrasty" | |
| clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) | |
| l = clahe.apply(l) | |
| # Step 3: Convert back to BGR | |
| enhanced = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR) | |
| # Step 4: Apply sharpening filter (makes edges stand out) | |
| sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
| sharpened = cv2.filter2D(enhanced, -1, sharpening_kernel) | |
| return sharpened | |
| def _pick_largest_face(faces: list) -> object: | |
| """ | |
| If there are multiple faces, return the biggest one. | |
| We assume the biggest face is the most important/relevant one. | |
| Args: | |
| faces: List of detected faces | |
| Returns: | |
| The largest face object, or None if no faces | |
| """ | |
| if not faces: | |
| return None | |
| # Calculate area of each face and return the one with largest area | |
| return sorted( | |
| faces, | |
| key=lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]), | |
| reverse=True | |
| )[0] | |
| def is_face_landmark_valid(face: object, frame_shape: tuple, min_confidence: float) -> bool: | |
| """ | |
| Check if a detected face's landmarks (eyes, nose, mouth, etc.) are valid. | |
| This is important for certain face swap methods that rely on facial features. | |
| A valid face should have: | |
| - Enough visible landmarks | |
| - Face not rotated too much | |
| - Reasonable confidence score | |
| - Face large enough to process | |
| Args: | |
| face: The detected face object | |
| frame_shape: Shape of the frame (height, width, channels) | |
| min_confidence: Minimum confidence score required | |
| Returns: | |
| True if the face's landmarks look good, False otherwise | |
| """ | |
| if face is None: | |
| return False | |
| h, w = frame_shape[:2] | |
| # Check 1: Does the face have landmarks (eyes, nose, etc.)? | |
| kps = getattr(face, 'kps', None) # kps = keypoints | |
| if kps is None or len(kps) < 5: | |
| return False | |
| # Check 2: Are all landmarks inside the frame (with some margin)? | |
| MARGIN = 10 # pixels | |
| for x, y in kps: | |
| if not (-MARGIN <= x <= w + MARGIN and -MARGIN <= y <= h + MARGIN): | |
| return False # Landmark is way outside the frame | |
| # Check 3: Is the face box valid? | |
| bx1, by1, bx2, by2 = face.bbox | |
| face_area = (bx2 - bx1) * (by2 - by1) | |
| frame_area = w * h | |
| # Face must be at least 1% of the frame | |
| if face_area < MIN_FACE_AREA_RATIO * frame_area: | |
| return False | |
| # Check 4: Is the head rotated too much? (better faces are mostly facing camera) | |
| pose = getattr(face, 'pose', None) # yaw, pitch, roll angles | |
| if pose is not None and len(pose) >= 3: | |
| head_roll = pose[2] # Roll (rotation left-to-right) | |
| if abs(head_roll) > FACE_POSE_THRESHOLD: | |
| return False # Head is tilted too much | |
| # Check 5: Is the detection confidence high enough? | |
| det_score = getattr(face, 'det_score', 1.0) | |
| if det_score < min_confidence: | |
| return False | |
| return True | |
| # ==================== FACE DETECTION FUNCTIONS ==================== | |
| def detect_faces_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> list: | |
| """ | |
| Detect all faces in an image. | |
| Args: | |
| bgr_image: The image to search for faces (BGR format from OpenCV) | |
| threshold: How confident the detector needs to be (0.0-1.0) | |
| Returns: | |
| List of detected face objects (empty list if no faces found) | |
| """ | |
| global face_analyser | |
| if face_analyser is None: | |
| return [] | |
| if bgr_image is None or bgr_image.size == 0: | |
| return [] | |
| try: | |
| with face_lock: | |
| set_detection_threshold(threshold) | |
| faces = face_analyser.get(bgr_image) | |
| # If no faces found, try with enhanced image | |
| if not faces: | |
| enhanced = enhance_image_for_detection(bgr_image) | |
| faces = face_analyser.get(enhanced) | |
| return faces | |
| except Exception as e: | |
| if DEBUG_MODE: | |
| print(f"Error during face detection: {e}") | |
| print(traceback.format_exc()) | |
| return [] | |
| def get_best_face_in_image(bgr_image: np.ndarray, threshold: float = MIN_FACE_CONFIDENCE) -> object: | |
| """ | |
| Find the most prominent face in an image. | |
| "Prominent" means the largest/most visible face - probably the main subject. | |
| Args: | |
| bgr_image: The image to search | |
| threshold: Detection confidence threshold (0.0-1.0) | |
| Returns: | |
| The face object for the largest face, or None if no faces found | |
| """ | |
| faces = detect_faces_in_image(bgr_image, threshold) | |
| return _pick_largest_face(faces) | |
| def detect_faces_with_multiscale(bgr_image: np.ndarray) -> object: | |
| """ | |
| Try to detect faces using multiple confidence thresholds. | |
| Some faces are hard to detect. They might be in shadow, turned away, or small. | |
| This method tries progressively lower confidence thresholds to find them. | |
| Args: | |
| bgr_image: The image to search | |
| Returns: | |
| The largest face found at any confidence level, or None | |
| """ | |
| for threshold in FACE_DETECTION_THRESHOLDS: | |
| face = get_best_face_in_image(bgr_image, threshold) | |
| if face: | |
| return face | |
| return None | |
| def get_face_embedding_from_url(url: str) -> object: | |
| """ | |
| Download an image from URL and get the face embedding (fingerprint) from it. | |
| A face embedding is like a fingerprint of a face - it uniquely identifies | |
| the person's face. We use it to know which face to swap FROM. | |
| This result is cached (remembered) so we don't have to re-download | |
| and re-analyze the same URL multiple times. | |
| Args: | |
| url: The URL of an image containing a face | |
| Returns: | |
| The face object with embedding, or None if extraction fails | |
| Raises: | |
| ValueError: If URL is invalid or download fails | |
| """ | |
| global face_analyser | |
| if face_analyser is None: | |
| raise ValueError("Face analyzer not initialized") | |
| try: | |
| # Download the image from the URL | |
| response = requests.get( | |
| url, | |
| headers=DEFAULT_HEADERS, | |
| timeout=DOWNLOAD_TIMEOUT, | |
| allow_redirects=True | |
| ) | |
| response.raise_for_status() | |
| # Convert downloaded bytes to image | |
| arr = np.frombuffer(response.content, np.uint8) | |
| bgr_image = cv2.imdecode(arr, cv2.IMREAD_COLOR) | |
| if bgr_image is None or bgr_image.size == 0: | |
| raise ValueError("Downloaded image is empty or invalid") | |
| # Detect face in the image | |
| with face_lock: | |
| set_detection_threshold(0.20) | |
| faces = face_analyser.get(bgr_image) | |
| # If no face found, try enhanced image | |
| if not faces: | |
| faces = face_analyser.get(enhance_image_for_detection(bgr_image)) | |
| if not faces: | |
| raise ValueError("No faces detected in source image") | |
| # Return the best (largest) face | |
| return _pick_largest_face(faces) | |
| except requests.exceptions.RequestException as e: | |
| raise ValueError(f"Failed to download source image: {e}") | |
| except Exception as e: | |
| if DEBUG_MODE: | |
| print(f"Error analyzing source image: {e}") | |
| print(traceback.format_exc()) | |
| raise ValueError(f"Failed to analyze source face: {e}") | |
| def clear_face_embedding_cache() -> None: | |
| """ | |
| Clear the cache of downloaded face embeddings. | |
| Use this when you want to free up memory or refresh the cache. | |
| """ | |
| get_face_embedding_from_url.cache_clear() | |