Spaces:
Running
Running
File size: 22,666 Bytes
b5d3a91 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 | # built-in dependencies
from typing import Any, Dict, IO, List, Tuple, Union, Optional, cast
from heapq import nlargest
# 3rd part dependencies
import numpy as np
from numpy.typing import NDArray
import cv2
# project dependencies
from deepface.modules import modeling
from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
from deepface.commons import image_utils
from deepface.models.face_detection import OpenCv
from deepface.modules.exceptions import FaceNotDetected
from deepface.modules.exceptions import UnimplementedError, ImgNotFound
from deepface.commons.logger import Logger
logger = Logger()
# pylint: disable=no-else-raise, too-many-positional-arguments
def is_valid_landmark(
coord: Optional[Union[Tuple[int, int], Tuple[float, float], List[Union[float, int]]]],
width: int,
height: int,
) -> bool:
"""
Check if a landmark coordinate is within valid image bounds.
Args:
coord (tuple or list or None): (x, y) coordinate to check.
width (int): Image width.
height (int): Image height.
Returns:
bool: True if coordinate is valid and within bounds, False otherwise.
"""
if coord is None:
return False
if not (isinstance(coord, (tuple, list)) and len(coord) == 2):
return False
x, y = coord
return 0 <= x < width and 0 <= y < height
def extract_faces(
img_path: Union[str, NDArray[Any], IO[bytes], List[str], List[NDArray[Any]], List[IO[bytes]]],
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
grayscale: bool = False,
color_face: str = "rgb",
normalize_face: bool = True,
anti_spoofing: bool = False,
max_faces: Optional[int] = None,
) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
"""
Extract faces from a given image
Args:
img_path (str or list of str ornp.ndarray or IO[bytes]): Path to the first image.
Accepts exact image path as a string, list of string, numpy array (BGR), a file object
that supports at least `.read` and is opened in binary mode, or base64 encoded images.
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m','yolov12l'
'centerface' or 'skip' (default is opencv)
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Default is True. Set to False to avoid the exception for low-resolution images.
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage.
grayscale (boolean): (Deprecated) Flag to convert the output face image to grayscale
(default is False).
color_face (string): Color to return face image output. Options: 'rgb', 'bgr' or 'gray'
(default is 'rgb').
normalize_face (boolean): Flag to enable normalization (divide by 255) of the output
face image output face image normalization (default is True).
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
left eye and right eye are eyes on the left and right respectively with respect
to the person itself instead of observer.
- "confidence" (float): The confidence score associated with the detected face.
- "is_real" (boolean): antispoofing analyze result. this key is just available in the
result only if anti_spoofing is set to True in input arguments.
- "antispoof_score" (float): score of antispoofing analyze result. this key is
just available in the result only if anti_spoofing is set to True in input arguments.
"""
if isinstance(img_path, list) or (isinstance(img_path, np.ndarray) and img_path.ndim == 4):
if isinstance(img_path, np.ndarray):
img_paths = [img_path[i] for i in range(img_path.shape[0])]
else:
img_paths = img_path
all_faces: List[List[Dict[str, Any]]] = []
for single_img_path in img_paths:
faces = cast(
List[Dict[str, Any]],
extract_faces(
img_path=single_img_path,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
grayscale=grayscale,
color_face=color_face,
normalize_face=normalize_face,
anti_spoofing=anti_spoofing,
max_faces=max_faces,
),
)
all_faces.append(faces)
return all_faces
resp_objs: List[Dict[str, Any]] = []
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img, img_name = image_utils.load_image(img_path)
if img is None:
raise ValueError(f"Exception while loading {img_name}")
height, width, _ = img.shape
base_region = FacialAreaRegion(x=0, y=0, w=width, h=height, confidence=0)
if detector_backend == "skip":
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
else:
face_objs = detect_faces(
detector_backend=detector_backend,
img=img,
align=align,
expand_percentage=expand_percentage,
max_faces=max_faces,
)
# in case of no face found
if len(face_objs) == 0 and enforce_detection is True:
if img_name is not None:
raise FaceNotDetected(
f"Face could not be detected in {img_name}."
"Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
else:
raise FaceNotDetected(
"Face could not be detected. Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
if len(face_objs) == 0 and enforce_detection is False:
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
for face_obj in face_objs:
current_img = face_obj.img
current_region = face_obj.facial_area
if current_img.shape[0] == 0 or current_img.shape[1] == 0:
continue
if grayscale is True:
logger.warn("Parameter grayscale is deprecated. Use color_face instead.")
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
else:
if color_face == "rgb":
current_img = current_img[:, :, ::-1]
elif color_face == "bgr":
pass # image is in BGR
elif color_face == "gray":
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
else:
raise UnimplementedError(
f"The color_face can be rgb, bgr or gray, but it is {color_face}."
)
if normalize_face:
current_img = current_img / 255 # normalize input in [0, 1]
# cast to int for flask, and do final checks for borders
x = max(0, int(current_region.x))
y = max(0, int(current_region.y))
w = min(width - x - 1, int(current_region.w))
h = min(height - y - 1, int(current_region.h))
landmarks = {
"left_eye": current_region.left_eye,
"right_eye": current_region.right_eye,
"nose": current_region.nose,
"mouth_left": current_region.mouth_left,
"mouth_right": current_region.mouth_right,
}
# convert these to raw python types to ensure compability with Flask
for key, value in landmarks.items():
if value is not None:
landmarks[key] = (int(value[0]), int(value[1]))
# Sanitize landmarks - set invalid ones to None
for key, value in landmarks.items():
if not is_valid_landmark(value, width, height):
landmarks[key] = None
facial_area = {
"x": x,
"y": y,
"w": w,
"h": h,
"left_eye": landmarks["left_eye"],
"right_eye": landmarks["right_eye"],
}
# optional nose, mouth_left and mouth_right fields are coming just for retinaface
if current_region.nose is not None:
facial_area["nose"] = landmarks["nose"]
if current_region.mouth_left is not None:
facial_area["mouth_left"] = landmarks["mouth_left"]
if current_region.mouth_right is not None:
facial_area["mouth_right"] = landmarks["mouth_right"]
resp_obj = {
"face": current_img,
"facial_area": facial_area,
"confidence": round(float(current_region.confidence or 0), 2),
}
if anti_spoofing is True:
antispoof_model = modeling.build_model(task="spoofing", model_name="Fasnet")
is_real, antispoof_score = antispoof_model.analyze(img=img, facial_area=(x, y, w, h))
resp_obj["is_real"] = is_real
resp_obj["antispoof_score"] = antispoof_score
resp_objs.append(resp_obj)
if len(resp_objs) == 0 and enforce_detection == True:
raise ImgNotFound(
f"Exception while extracting faces from {img_name}."
"Consider to set enforce_detection arg to False."
)
return resp_objs
def detect_faces(
detector_backend: str,
img: NDArray[Any],
align: bool = True,
expand_percentage: int = 0,
max_faces: Optional[int] = None,
) -> List[DetectedFace]:
"""
Detect face(s) from a given image
Args:
detector_backend (str): detector name
img (np.ndarray): pre-loaded image
align (bool): enable or disable alignment after detection
expand_percentage (int): expand detected facial area with a percentage (default is 0).
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
left_eye and right eye. left eye and right eye are eyes on the left and right
with respect to the person instead of observer.
- confidence (float): The confidence score associated with the detected face.
"""
height, width, _ = img.shape
face_detector: Detector = modeling.build_model(
task="face_detector", model_name=detector_backend
)
# validate expand percentage score
if expand_percentage < 0:
logger.warn(
f"Expand percentage cannot be negative but you set it to {expand_percentage}."
"Overwritten it to 0."
)
expand_percentage = 0
# If faces are close to the upper boundary, alignment move them outside
# Add a black border around an image to avoid this.
height_border = int(0.5 * height)
width_border = int(0.5 * width)
if align is True:
img = cv2.copyMakeBorder(
img,
height_border,
height_border,
width_border,
width_border,
cv2.BORDER_CONSTANT,
value=[0, 0, 0], # Color of the border (black)
)
# find facial areas of given image
facial_areas = face_detector.detect_faces(img)
if max_faces is not None and max_faces < len(facial_areas):
facial_areas = nlargest(
max_faces, facial_areas, key=lambda facial_area: facial_area.w * facial_area.h
)
return [
extract_face(
facial_area=facial_area,
img=img,
align=align,
expand_percentage=expand_percentage,
width_border=width_border,
height_border=height_border,
detector_backend=detector_backend,
)
for facial_area in facial_areas
]
def extract_face(
facial_area: FacialAreaRegion,
img: NDArray[Any],
align: bool,
expand_percentage: int,
width_border: int,
height_border: int,
detector_backend: str,
) -> DetectedFace:
x = facial_area.x
y = facial_area.y
w = facial_area.w
h = facial_area.h
left_eye = facial_area.left_eye
right_eye = facial_area.right_eye
confidence = facial_area.confidence
nose = facial_area.nose
mouth_left = facial_area.mouth_left
mouth_right = facial_area.mouth_right
if expand_percentage > 0:
# Expand the facial region height and width by the provided percentage
# ensuring that the expanded region stays within img.shape limits
expanded_w = w + int(w * expand_percentage / 100)
expanded_h = h + int(h * expand_percentage / 100)
x = max(0, x - int((expanded_w - w) / 2))
y = max(0, y - int((expanded_h - h) / 2))
w = min(img.shape[1] - x, expanded_w)
h = min(img.shape[0] - y, expanded_h)
# extract detected face unaligned
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# use opencv if eyes aren't provided by the detector (e.g. ssd, yolo)
if detector_backend != "opencv" and (left_eye is None or right_eye is None):
default_detector: OpenCv.OpenCvClient = modeling.build_model(
task="face_detector", model_name="opencv"
)
left_eye_new, right_eye_new = default_detector.find_eyes(detected_face)
if left_eye is None and left_eye_new is not None:
left_eye = (int(left_eye_new[0]) + x, int(left_eye_new[1]) + y)
logger.debug(
f"left eye wasn't detected by {detector_backend}, overwritten by cv2 - {left_eye}"
)
if right_eye is None and right_eye_new is not None:
right_eye = (int(right_eye_new[0]) + x, int(right_eye_new[1]) + y)
logger.debug(
f"right eye wasn't detected by {detector_backend}, overwritten by cv2 - {right_eye}"
)
# align original image, then find projection of detected face area after alignment
if align is True: # and left_eye is not None and right_eye is not None:
# we were aligning the original image before, but this comes with an extra cost
# instead we now focus on the facial area with a margin
# and align it instead of original image to decrese the cost
sub_img, relative_x, relative_y = extract_sub_image(img=img, facial_area=(x, y, w, h))
aligned_sub_img, angle = align_img_wrt_eyes(
img=sub_img, left_eye=left_eye, right_eye=right_eye
)
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = project_facial_area(
facial_area=(
relative_x,
relative_y,
relative_x + w,
relative_y + h,
),
angle=angle,
size=(sub_img.shape[0], sub_img.shape[1]),
)
detected_face = aligned_sub_img[
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
]
# do not spend memory for these temporary variables anymore
del aligned_sub_img, sub_img
# restore x, y, le and re before border added
x = x - width_border
y = y - height_border
# w and h will not change
if left_eye is not None:
left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
if right_eye is not None:
right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
if nose is not None:
nose = (nose[0] - width_border, nose[1] - height_border)
if mouth_left is not None:
mouth_left = (mouth_left[0] - width_border, mouth_left[1] - height_border)
if mouth_right is not None:
mouth_right = (mouth_right[0] - width_border, mouth_right[1] - height_border)
return DetectedFace(
img=detected_face,
facial_area=FacialAreaRegion(
x=x,
y=y,
h=h,
w=w,
confidence=confidence,
left_eye=left_eye,
right_eye=right_eye,
nose=nose,
mouth_left=mouth_left,
mouth_right=mouth_right,
),
confidence=confidence or 0,
)
def extract_sub_image(
img: NDArray[Any], facial_area: Tuple[int, int, int, int]
) -> Tuple[NDArray[Any], int, int]:
"""
Get the sub image with given facial area while expanding the facial region
to ensure alignment does not shift the face outside the image.
This function doubles the height and width of the face region,
and adds black pixels if necessary.
Args:
- img (np.ndarray): pre-loaded image with detected face
- facial_area (tuple of int): Representing the (x, y, w, h) of the facial area.
Returns:
- extracted_face (np.ndarray): expanded facial image
- relative_x (int): adjusted x-coordinates relative to the expanded region
- relative_y (int): adjusted y-coordinates relative to the expanded region
"""
x, y, w, h = facial_area
relative_x = int(0.5 * w)
relative_y = int(0.5 * h)
# calculate expanded coordinates
x1, y1 = x - relative_x, y - relative_y
x2, y2 = x + w + relative_x, y + h + relative_y
# most of the time, the expanded region fits inside the image
if x1 >= 0 and y1 >= 0 and x2 <= img.shape[1] and y2 <= img.shape[0]:
return img[y1:y2, x1:x2], relative_x, relative_y
# but sometimes, we need to add black pixels
# ensure the coordinates are within bounds
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
cropped_region = img[y1:y2, x1:x2]
# create a black image
extracted_face = np.zeros(
(h + 2 * relative_y, w + 2 * relative_x, img.shape[2]), dtype=img.dtype
)
# map the cropped region
start_x = max(0, relative_x - x)
start_y = max(0, relative_y - y)
extracted_face[
start_y : start_y + cropped_region.shape[0], start_x : start_x + cropped_region.shape[1]
] = cropped_region
return extracted_face, relative_x, relative_y
def align_img_wrt_eyes(
img: NDArray[Any],
left_eye: Optional[Union[List[float], List[int], Tuple[float, float], Tuple[int, int]]],
right_eye: Optional[Union[List[float], List[int], Tuple[float, float], Tuple[int, int]]],
) -> Tuple[NDArray[Any], float]:
"""
Align a given image horizantally with respect to their left and right eye locations
Args:
img (np.ndarray): pre-loaded image with detected face
left_eye (list or tuple): coordinates of left eye with respect to the person itself
right_eye(list or tuple): coordinates of right eye with respect to the person itself
Returns:
img (np.ndarray): aligned facial image
"""
# if eye could not be detected for the given image, return image itself
if left_eye is None or right_eye is None:
return img, 0
# sometimes unexpectedly detected images come with nil dimensions
if img.shape[0] == 0 or img.shape[1] == 0:
return img, 0
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
img = cv2.warpAffine(
img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)
)
return img, angle
def project_facial_area(
facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
) -> Tuple[int, int, int, int]:
"""
Update pre-calculated facial area coordinates after image itself
rotated with respect to the eyes.
Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80
Args:
facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area.
x2 is equal to x1 + w1, and y2 is equal to y1 + h1
angle (float): Angle of rotation in degrees. Its sign determines the direction of rotation.
Note that angles > 360 degrees are normalized to the range [0, 360).
size (tuple of int): Tuple representing the size of the image (width, height).
Returns:
rotated_coordinates (tuple of int): Representing the new coordinates
(x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area.
"""
# Normalize the witdh of the angle so we don't have to
# worry about rotations greater than 360 degrees.
# We workaround the quirky behavior of the modulo operator
# for negative angle values.
direction = 1 if angle >= 0 else -1
angle = abs(angle) % 360
if angle == 0:
return facial_area
# Angle in radians
angle = angle * np.pi / 180
height, weight = size
# Translate the facial area to the center of the image
x = (facial_area[0] + facial_area[2]) / 2 - weight / 2
y = (facial_area[1] + facial_area[3]) / 2 - height / 2
# Rotate the facial area
x_new = x * np.cos(angle) + y * direction * np.sin(angle)
y_new = -x * direction * np.sin(angle) + y * np.cos(angle)
# Translate the facial area back to the original position
x_new = x_new + weight / 2
y_new = y_new + height / 2
# Calculate projected coordinates after alignment
x1 = x_new - (facial_area[2] - facial_area[0]) / 2
y1 = y_new - (facial_area[3] - facial_area[1]) / 2
x2 = x_new + (facial_area[2] - facial_area[0]) / 2
y2 = y_new + (facial_area[3] - facial_area[1]) / 2
# validate projected coordinates are in image's boundaries
x1 = max(int(x1), 0)
y1 = max(int(y1), 0)
x2 = min(int(x2), weight)
y2 = min(int(y2), height)
return (x1, y1, x2, y2)
|