| |
| |
| |
| |
|
|
| """ |
| Face detection and cropping utilities for 3D face reconstruction. |
| |
| This module provides functions for face detection, cropping, and preprocessing |
| to align faces with training data specifications. |
| """ |
|
|
| from typing import Tuple, Optional, Dict, Any |
| import numpy as np |
| import torch |
| from PIL import Image |
| from facenet_pytorch import MTCNN |
| from rembg import remove |
|
|
| |
| TRAINING_SET_FACE_SIZE = 194.2749650813705 |
| TRAINING_SET_FACE_CENTER = [251.83270369057132, 280.0133630862363] |
|
|
| |
| FACE_SIZE = TRAINING_SET_FACE_SIZE |
| FACE_CENTER = TRAINING_SET_FACE_CENTER |
| DEFAULT_BACKGROUND_COLOR = (255, 255, 255) |
| DEFAULT_IMG_SIZE = 512 |
|
|
| |
| DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
| |
| FACE_DETECTOR = MTCNN( |
| image_size=512, |
| margin=0, |
| min_face_size=20, |
| thresholds=[0.6, 0.7, 0.7], |
| factor=0.709, |
| post_process=True, |
| device=DEVICE |
| ) |
|
|
| def select_face(detected_bounding_boxes: Optional[np.ndarray], confidence_scores: Optional[np.ndarray]) -> Optional[np.ndarray]: |
| """ |
| Select the largest face from detected faces with confidence above threshold. |
| |
| Args: |
| detected_bounding_boxes: Detected bounding boxes in xyxy format |
| confidence_scores: Detection confidence probabilities |
| |
| Returns: |
| Selected bounding box or None if no suitable face found |
| """ |
| if detected_bounding_boxes is None or confidence_scores is None: |
| return None |
| |
| |
| high_confidence_faces = [ |
| detected_bounding_boxes[i] for i in range(len(detected_bounding_boxes)) |
| if confidence_scores[i] > 0.8 |
| ] |
| |
| if not high_confidence_faces: |
| return None |
|
|
| |
| return max(high_confidence_faces, key=lambda bbox: (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])) |
|
|
| def crop_face( |
| input_image_array: np.ndarray, |
| face_detector: MTCNN = FACE_DETECTOR, |
| target_face_size: float = FACE_SIZE, |
| target_face_center: list = FACE_CENTER, |
| output_image_size: int = 512, |
| background_color: Tuple[int, int, int] = (255, 255, 255) |
| ) -> Tuple[Image.Image, Dict[str, Any]]: |
| """ |
| Crop and align face in image to match training data specifications. |
| |
| Args: |
| input_image_array: Input image as numpy array (H, W, C) |
| face_detector: MTCNN face detector instance |
| target_face_size: Target face size from training data |
| target_face_center: Target face center from training data |
| output_image_size: Output image size |
| background_color: Background color for padding |
| |
| Returns: |
| Tuple of (cropped_image, crop_parameters) |
| |
| Raises: |
| ValueError: If no face is detected in the image |
| """ |
| image_height, image_width, _ = input_image_array.shape |
| |
| |
| if input_image_array.shape[2] == 4: |
| rgba_pil_image = Image.fromarray(input_image_array) |
| background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
| rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
| processed_image_array = np.array(rgb_composite_image) |
| else: |
| processed_image_array = input_image_array[:, :, :3] |
|
|
| |
| detected_bounding_boxes, confidence_scores = face_detector.detect(processed_image_array) |
| selected_face_bbox = select_face(detected_bounding_boxes, confidence_scores) |
| if selected_face_bbox is None: |
| raise ValueError("No face detected in the image") |
|
|
| |
| detected_face_size = 0.5 * (selected_face_bbox[2] - selected_face_bbox[0] + selected_face_bbox[3] - selected_face_bbox[1]) |
| detected_face_center = ( |
| 0.5 * (selected_face_bbox[0] + selected_face_bbox[2]), |
| 0.5 * (selected_face_bbox[1] + selected_face_bbox[3]) |
| ) |
|
|
| |
| scale_ratio = target_face_size / detected_face_size |
| scaled_width, scaled_height = int(image_width * scale_ratio), int(image_height * scale_ratio) |
| scaled_pil_image = Image.fromarray(processed_image_array).resize((scaled_width, scaled_height)) |
| scaled_face_center = ( |
| int(detected_face_center[0] * scale_ratio), |
| int(detected_face_center[1] * scale_ratio) |
| ) |
|
|
| |
| output_image = Image.new("RGB", (output_image_size, output_image_size), color=background_color) |
|
|
| |
| horizontal_offset = target_face_center[0] - scaled_face_center[0] |
| vertical_offset = target_face_center[1] - scaled_face_center[1] |
|
|
| |
| crop_left_boundary = int(max(0, -horizontal_offset)) |
| crop_top_boundary = int(max(0, -vertical_offset)) |
| crop_right_boundary = int(min(scaled_width, output_image_size - horizontal_offset)) |
| crop_bottom_boundary = int(min(scaled_height, output_image_size - vertical_offset)) |
|
|
| |
| cropped_face_image = scaled_pil_image.crop((crop_left_boundary, crop_top_boundary, crop_right_boundary, crop_bottom_boundary)) |
| paste_coordinates = (int(max(0, horizontal_offset)), int(max(0, vertical_offset))) |
| output_image.paste(cropped_face_image, paste_coordinates) |
|
|
| crop_parameters = { |
| 'resize_ratio': scale_ratio, |
| 'x_offset_left': horizontal_offset, |
| 'y_offset_top': vertical_offset, |
| } |
|
|
| return output_image, crop_parameters |
|
|
| def prepare_foreground_with_rembg(input_image_array: np.ndarray) -> np.ndarray: |
| """ |
| Prepare foreground image using rembg for background removal. |
| |
| Args: |
| input_image_array: Input image as numpy array (H, W, C) |
| |
| Returns: |
| RGBA image as numpy array with background removed |
| """ |
| pil_image = Image.fromarray(input_image_array) |
| background_removed_image = remove(pil_image) |
| processed_image_array = np.array(background_removed_image) |
| |
| |
| if processed_image_array.shape[2] == 4: |
| return processed_image_array |
| elif processed_image_array.shape[2] == 3: |
| height, width = processed_image_array.shape[:2] |
| alpha_channel = np.full((height, width), 255, dtype=np.uint8) |
| rgba_image = np.zeros((height, width, 4), dtype=np.uint8) |
| rgba_image[:, :, :3] = processed_image_array |
| rgba_image[:, :, 3] = alpha_channel |
| return rgba_image |
| |
| return processed_image_array |
|
|
| def preprocess_image( |
| original_image_array: np.ndarray, |
| target_image_size: int = DEFAULT_IMG_SIZE, |
| background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
| ) -> Image.Image: |
| """ |
| Preprocess image with background removal and face cropping. |
| |
| Args: |
| original_image_array: Input image as numpy array |
| target_image_size: Target image size |
| background_color: Background color for compositing |
| |
| Returns: |
| Processed PIL Image |
| """ |
| processed_image_array = prepare_foreground_with_rembg(original_image_array) |
| |
| |
| if processed_image_array.shape[2] == 4: |
| rgba_pil_image = Image.fromarray(processed_image_array) |
| background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
| rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
| processed_image_array = np.array(rgb_composite_image) |
| |
| cropped_image, crop_parameters = crop_face( |
| processed_image_array, |
| FACE_DETECTOR, |
| FACE_SIZE, |
| FACE_CENTER, |
| target_image_size, |
| background_color |
| ) |
| return cropped_image |
|
|
| def preprocess_image_without_cropping( |
| original_image_array: np.ndarray, |
| target_image_size: int = DEFAULT_IMG_SIZE, |
| background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
| ) -> Image.Image: |
| """ |
| Preprocess image with background removal, without face cropping. |
| |
| Args: |
| original_image_array: Input image as numpy array |
| target_image_size: Target image size |
| background_color: Background color for compositing |
| |
| Returns: |
| Processed PIL Image |
| """ |
| processed_image_array = prepare_foreground_with_rembg(original_image_array) |
| |
| resized_image = Image.fromarray(processed_image_array).resize((target_image_size, target_image_size)) |
| background_image = Image.new("RGBA", (target_image_size, target_image_size), background_color) |
| composite_image = Image.alpha_composite(background_image, resized_image).convert("RGB") |
| return composite_image |
|
|