Spaces:
Running
Running
| # built-in dependencies | |
| from typing import Any, Dict, List, Union, Optional, Sequence, IO, cast | |
| from collections import defaultdict | |
| # 3rd party dependencies | |
| import numpy as np | |
| from numpy.typing import NDArray | |
| from lightphe import LightPHE | |
| # project dependencies | |
| from deepface.commons import image_utils | |
| from deepface.modules import modeling, detection, preprocessing | |
| from deepface.models.FacialRecognition import FacialRecognition | |
| from deepface.modules.normalization import normalize_embedding_l2, normalize_embedding_minmax | |
| from deepface.modules.encryption import encrypt_embeddings | |
| from deepface.modules.exceptions import SpoofDetected | |
| from deepface.commons.logger import Logger | |
| logger = Logger() | |
| # pylint: disable=too-many-positional-arguments | |
| def represent( | |
| img_path: Union[str, IO[bytes], NDArray[Any], Sequence[Union[str, NDArray[Any], IO[bytes]]]], | |
| model_name: str = "VGG-Face", | |
| enforce_detection: bool = True, | |
| detector_backend: str = "opencv", | |
| align: bool = True, | |
| expand_percentage: int = 0, | |
| normalization: str = "base", | |
| anti_spoofing: bool = False, | |
| max_faces: Optional[int] = None, | |
| l2_normalize: bool = False, | |
| minmax_normalize: bool = False, | |
| return_face: bool = False, | |
| cryptosystem: Optional[LightPHE] = None, | |
| ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: | |
| """ | |
| Represent facial images as multi-dimensional vector embeddings. | |
| Args: | |
| img_path (str, np.ndarray, or Sequence[Union[str, np.ndarray]]): | |
| The exact path to the image, a numpy array in BGR format, | |
| a base64 encoded image, or a sequence of these. | |
| If the source image contains multiple faces, | |
| the result will include information for each detected face. | |
| model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, | |
| OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet | |
| enforce_detection (boolean): If no face is detected in an image, raise an exception. | |
| Default is True. Set to False to avoid the exception for low-resolution images. | |
| detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', | |
| 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n', | |
| 'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l' | |
| 'centerface' or 'skip'. | |
| align (boolean): Perform alignment based on the eye positions. | |
| expand_percentage (int): expand detected facial area with a percentage (default is 0). | |
| normalization (string): Normalize the input image before feeding it to the model. | |
| Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace | |
| anti_spoofing (boolean): Flag to enable anti spoofing (default is False). | |
| max_faces (int): Set a limit on the number of faces to be processed (default is None). | |
| l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization) | |
| of the output embeddings | |
| minmax_normalize (bool): Flag to enable min-max normalization of the output embeddings | |
| to the range [0, 1]. | |
| return_face (bool): If True, the detected face images will also be returned along | |
| with embeddings. Default is False. | |
| cryptosystem (LightPHE): An instance of a partially homomorphic encryption system | |
| to encrypt the output embeddings. If provided, the embeddings will be encrypted | |
| using the specified cryptosystem. Then, you will be able to perform homomorphic | |
| operations on the encrypted embeddings without decrypting them first. | |
| Check out the repo to find out more: https://github.com/serengil/lightphe | |
| Returns: | |
| results (List[Dict[str, Any]] or List[Dict[str, Any]]): A list of dictionaries. | |
| Result type becomes List of List of Dict if batch input passed. | |
| Each containing the following fields: | |
| - embedding (List[float]): Multidimensional vector representing facial features. | |
| The number of dimensions varies based on the reference model | |
| (e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions). | |
| - facial_area (dict): Detected facial area by face detection in dictionary format. | |
| Contains 'x' and 'y' as the left-corner point, and 'w' and 'h' | |
| as the width and height. If `detector_backend` is set to 'skip', it represents | |
| the full image area and is nonsensical. | |
| - face_confidence (float): Confidence score of face detection. If `detector_backend` is set | |
| to 'skip', the confidence will be 0 and is nonsensical. | |
| - encrypted_embedding (List[Any]): Encrypted multidimensional vector representing | |
| facial features. This field is included only if a `cryptosystem` is provided. | |
| """ | |
| resp_objs = [] | |
| model: FacialRecognition = modeling.build_model( | |
| task="facial_recognition", model_name=model_name | |
| ) | |
| # Handle list of image paths or 4D numpy array | |
| if isinstance(img_path, list): | |
| images = img_path | |
| elif isinstance(img_path, np.ndarray) and img_path.ndim == 4: | |
| images = [img_path[i] for i in range(img_path.shape[0])] | |
| else: | |
| images = [img_path] | |
| batch_images, batch_regions, batch_confidences, batch_indexes = [], [], [], [] | |
| for idx, single_img_path in enumerate(images): | |
| # we have run pre-process in verification. so, skip if it is coming from verify. | |
| target_size = model.input_shape | |
| if detector_backend != "skip": | |
| # Images are returned in RGB format. | |
| img_objs: List[Dict[str, Any]] = cast( | |
| List[Dict[str, Any]], | |
| detection.extract_faces( | |
| img_path=single_img_path, | |
| detector_backend=detector_backend, | |
| grayscale=False, | |
| enforce_detection=enforce_detection, | |
| align=align, | |
| expand_percentage=expand_percentage, | |
| anti_spoofing=anti_spoofing, | |
| max_faces=max_faces, | |
| ), | |
| ) | |
| else: # skip | |
| # Try load. If load error, will raise exception internal | |
| img, _ = image_utils.load_image(single_img_path) | |
| if len(img.shape) != 3: | |
| raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}") | |
| # Convert to RGB format to keep compatability with `extract_faces`. | |
| img = img[:, :, ::-1] | |
| # make dummy region and confidence to keep compatibility with `extract_faces` | |
| img_objs = [ | |
| { | |
| "face": img, | |
| "facial_area": {"x": 0, "y": 0, "w": img.shape[0], "h": img.shape[1]}, | |
| "confidence": 0, | |
| } | |
| ] | |
| # --------------------------------- | |
| if max_faces is not None and max_faces < len(img_objs): | |
| # sort as largest facial areas come first | |
| img_objs = sorted( | |
| img_objs, | |
| key=lambda img_obj: img_obj["facial_area"]["w"] * img_obj["facial_area"]["h"], | |
| reverse=True, | |
| ) | |
| # discard rest of the items | |
| img_objs = img_objs[0:max_faces] | |
| for img_obj in img_objs: | |
| if anti_spoofing is True and img_obj.get("is_real", True) is False: | |
| raise SpoofDetected("Spoof detected in the given image.") | |
| img = img_obj["face"] | |
| # rgb to bgr | |
| img = img[:, :, ::-1] | |
| region = img_obj["facial_area"] | |
| confidence = img_obj["confidence"] | |
| # resize to expected shape of ml model | |
| img = preprocessing.resize_image( | |
| img=img, | |
| # thanks to DeepId (!) | |
| target_size=(target_size[1], target_size[0]), | |
| ) | |
| # custom normalization | |
| img = preprocessing.normalize_input(img=img, normalization=normalization) | |
| batch_images.append(img) | |
| batch_regions.append(region) | |
| batch_confidences.append(confidence) | |
| batch_indexes.append(idx) | |
| # Convert list of images to a numpy array for batch processing | |
| batch_images_np = np.concatenate(batch_images, axis=0) | |
| # Forward pass through the model for the entire batch | |
| embeddings = model.forward(batch_images_np) | |
| if minmax_normalize: | |
| embeddings = normalize_embedding_minmax(model_name, embeddings) | |
| if l2_normalize: | |
| embeddings = normalize_embedding_l2(embeddings) | |
| encrypted_embeddings = encrypt_embeddings(embeddings, cryptosystem) | |
| resp_objs_dict = defaultdict(list) | |
| for idy, batch_index in enumerate(batch_indexes): | |
| resp_obj = { | |
| "embedding": embeddings if len(batch_images) == 1 else embeddings[idy], | |
| "facial_area": batch_regions[idy], | |
| "face_confidence": batch_confidences[idy], | |
| } | |
| if return_face: | |
| resp_obj["face"] = batch_images_np[idy] | |
| if cryptosystem is not None and encrypted_embeddings is not None: | |
| resp_obj["encrypted_embedding"] = ( | |
| encrypted_embeddings if len(batch_images) == 1 else encrypted_embeddings[idy] | |
| ) | |
| resp_objs_dict[batch_index].append(resp_obj) | |
| resp_objs = [resp_objs_dict[idx] for idx in range(len(images))] | |
| return resp_objs[0] if len(images) == 1 else resp_objs | |