|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Face detection and cropping utilities for 3D face reconstruction. |
|
|
|
|
|
This module provides functions for face detection, cropping, and preprocessing |
|
|
to align faces with training data specifications. |
|
|
""" |
|
|
|
|
|
from typing import Tuple, Optional, Dict, Any |
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
from facenet_pytorch import MTCNN |
|
|
from rembg import remove |
|
|
|
|
|
|
|
|
TRAINING_SET_FACE_SIZE = 194.2749650813705 |
|
|
TRAINING_SET_FACE_CENTER = [251.83270369057132, 280.0133630862363] |
|
|
|
|
|
|
|
|
FACE_SIZE = TRAINING_SET_FACE_SIZE |
|
|
FACE_CENTER = TRAINING_SET_FACE_CENTER |
|
|
DEFAULT_BACKGROUND_COLOR = (255, 255, 255) |
|
|
DEFAULT_IMG_SIZE = 512 |
|
|
|
|
|
|
|
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
|
|
|
|
|
FACE_DETECTOR = MTCNN( |
|
|
image_size=512, |
|
|
margin=0, |
|
|
min_face_size=20, |
|
|
thresholds=[0.6, 0.7, 0.7], |
|
|
factor=0.709, |
|
|
post_process=True, |
|
|
device=DEVICE |
|
|
) |
|
|
|
|
|
def select_face(detected_bounding_boxes: Optional[np.ndarray], confidence_scores: Optional[np.ndarray]) -> Optional[np.ndarray]: |
|
|
""" |
|
|
Select the largest face from detected faces with confidence above threshold. |
|
|
|
|
|
Args: |
|
|
detected_bounding_boxes: Detected bounding boxes in xyxy format |
|
|
confidence_scores: Detection confidence probabilities |
|
|
|
|
|
Returns: |
|
|
Selected bounding box or None if no suitable face found |
|
|
""" |
|
|
if detected_bounding_boxes is None or confidence_scores is None: |
|
|
return None |
|
|
|
|
|
|
|
|
high_confidence_faces = [ |
|
|
detected_bounding_boxes[i] for i in range(len(detected_bounding_boxes)) |
|
|
if confidence_scores[i] > 0.8 |
|
|
] |
|
|
|
|
|
if not high_confidence_faces: |
|
|
return None |
|
|
|
|
|
|
|
|
return max(high_confidence_faces, key=lambda bbox: (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])) |
|
|
|
|
|
def crop_face( |
|
|
input_image_array: np.ndarray, |
|
|
face_detector: MTCNN = FACE_DETECTOR, |
|
|
target_face_size: float = FACE_SIZE, |
|
|
target_face_center: list = FACE_CENTER, |
|
|
output_image_size: int = 512, |
|
|
background_color: Tuple[int, int, int] = (255, 255, 255) |
|
|
) -> Tuple[Image.Image, Dict[str, Any]]: |
|
|
""" |
|
|
Crop and align face in image to match training data specifications. |
|
|
|
|
|
Args: |
|
|
input_image_array: Input image as numpy array (H, W, C) |
|
|
face_detector: MTCNN face detector instance |
|
|
target_face_size: Target face size from training data |
|
|
target_face_center: Target face center from training data |
|
|
output_image_size: Output image size |
|
|
background_color: Background color for padding |
|
|
|
|
|
Returns: |
|
|
Tuple of (cropped_image, crop_parameters) |
|
|
|
|
|
Raises: |
|
|
ValueError: If no face is detected in the image |
|
|
""" |
|
|
image_height, image_width, _ = input_image_array.shape |
|
|
|
|
|
|
|
|
if input_image_array.shape[2] == 4: |
|
|
rgba_pil_image = Image.fromarray(input_image_array) |
|
|
background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
|
|
rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
|
|
processed_image_array = np.array(rgb_composite_image) |
|
|
else: |
|
|
processed_image_array = input_image_array[:, :, :3] |
|
|
|
|
|
|
|
|
detected_bounding_boxes, confidence_scores = face_detector.detect(processed_image_array) |
|
|
selected_face_bbox = select_face(detected_bounding_boxes, confidence_scores) |
|
|
if selected_face_bbox is None: |
|
|
raise ValueError("No face detected in the image") |
|
|
|
|
|
|
|
|
detected_face_size = 0.5 * (selected_face_bbox[2] - selected_face_bbox[0] + selected_face_bbox[3] - selected_face_bbox[1]) |
|
|
detected_face_center = ( |
|
|
0.5 * (selected_face_bbox[0] + selected_face_bbox[2]), |
|
|
0.5 * (selected_face_bbox[1] + selected_face_bbox[3]) |
|
|
) |
|
|
|
|
|
|
|
|
scale_ratio = target_face_size / detected_face_size |
|
|
scaled_width, scaled_height = int(image_width * scale_ratio), int(image_height * scale_ratio) |
|
|
scaled_pil_image = Image.fromarray(processed_image_array).resize((scaled_width, scaled_height)) |
|
|
scaled_face_center = ( |
|
|
int(detected_face_center[0] * scale_ratio), |
|
|
int(detected_face_center[1] * scale_ratio) |
|
|
) |
|
|
|
|
|
|
|
|
output_image = Image.new("RGB", (output_image_size, output_image_size), color=background_color) |
|
|
|
|
|
|
|
|
horizontal_offset = target_face_center[0] - scaled_face_center[0] |
|
|
vertical_offset = target_face_center[1] - scaled_face_center[1] |
|
|
|
|
|
|
|
|
crop_left_boundary = int(max(0, -horizontal_offset)) |
|
|
crop_top_boundary = int(max(0, -vertical_offset)) |
|
|
crop_right_boundary = int(min(scaled_width, output_image_size - horizontal_offset)) |
|
|
crop_bottom_boundary = int(min(scaled_height, output_image_size - vertical_offset)) |
|
|
|
|
|
|
|
|
cropped_face_image = scaled_pil_image.crop((crop_left_boundary, crop_top_boundary, crop_right_boundary, crop_bottom_boundary)) |
|
|
paste_coordinates = (int(max(0, horizontal_offset)), int(max(0, vertical_offset))) |
|
|
output_image.paste(cropped_face_image, paste_coordinates) |
|
|
|
|
|
crop_parameters = { |
|
|
'resize_ratio': scale_ratio, |
|
|
'x_offset_left': horizontal_offset, |
|
|
'y_offset_top': vertical_offset, |
|
|
} |
|
|
|
|
|
return output_image, crop_parameters |
|
|
|
|
|
def prepare_foreground_with_rembg(input_image_array: np.ndarray) -> np.ndarray: |
|
|
""" |
|
|
Prepare foreground image using rembg for background removal. |
|
|
|
|
|
Args: |
|
|
input_image_array: Input image as numpy array (H, W, C) |
|
|
|
|
|
Returns: |
|
|
RGBA image as numpy array with background removed |
|
|
""" |
|
|
pil_image = Image.fromarray(input_image_array) |
|
|
background_removed_image = remove(pil_image) |
|
|
processed_image_array = np.array(background_removed_image) |
|
|
|
|
|
|
|
|
if processed_image_array.shape[2] == 4: |
|
|
return processed_image_array |
|
|
elif processed_image_array.shape[2] == 3: |
|
|
height, width = processed_image_array.shape[:2] |
|
|
alpha_channel = np.full((height, width), 255, dtype=np.uint8) |
|
|
rgba_image = np.zeros((height, width, 4), dtype=np.uint8) |
|
|
rgba_image[:, :, :3] = processed_image_array |
|
|
rgba_image[:, :, 3] = alpha_channel |
|
|
return rgba_image |
|
|
|
|
|
return processed_image_array |
|
|
|
|
|
def preprocess_image( |
|
|
original_image_array: np.ndarray, |
|
|
target_image_size: int = DEFAULT_IMG_SIZE, |
|
|
background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
|
|
) -> Image.Image: |
|
|
""" |
|
|
Preprocess image with background removal and face cropping. |
|
|
|
|
|
Args: |
|
|
original_image_array: Input image as numpy array |
|
|
target_image_size: Target image size |
|
|
background_color: Background color for compositing |
|
|
|
|
|
Returns: |
|
|
Processed PIL Image |
|
|
""" |
|
|
processed_image_array = prepare_foreground_with_rembg(original_image_array) |
|
|
|
|
|
|
|
|
if processed_image_array.shape[2] == 4: |
|
|
rgba_pil_image = Image.fromarray(processed_image_array) |
|
|
background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
|
|
rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
|
|
processed_image_array = np.array(rgb_composite_image) |
|
|
|
|
|
cropped_image, crop_parameters = crop_face( |
|
|
processed_image_array, |
|
|
FACE_DETECTOR, |
|
|
FACE_SIZE, |
|
|
FACE_CENTER, |
|
|
target_image_size, |
|
|
background_color |
|
|
) |
|
|
return cropped_image |
|
|
|
|
|
def preprocess_image_without_cropping( |
|
|
original_image_array: np.ndarray, |
|
|
target_image_size: int = DEFAULT_IMG_SIZE, |
|
|
background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
|
|
) -> Image.Image: |
|
|
""" |
|
|
Preprocess image with background removal, without face cropping. |
|
|
|
|
|
Args: |
|
|
original_image_array: Input image as numpy array |
|
|
target_image_size: Target image size |
|
|
background_color: Background color for compositing |
|
|
|
|
|
Returns: |
|
|
Processed PIL Image |
|
|
""" |
|
|
processed_image_array = prepare_foreground_with_rembg(original_image_array) |
|
|
|
|
|
resized_image = Image.fromarray(processed_image_array).resize((target_image_size, target_image_size)) |
|
|
background_image = Image.new("RGBA", (target_image_size, target_image_size), background_color) |
|
|
composite_image = Image.alpha_composite(background_image, resized_image).convert("RGB") |
|
|
return composite_image |
|
|
|