Spaces:

daomanhduc
/

datn-face-ai

Running

File size: 9,607 Bytes

b5d3a91

# built-in dependencies
from typing import Any, Dict, List, Union, Optional, Sequence, IO, cast
from collections import defaultdict

# 3rd party dependencies
import numpy as np
from numpy.typing import NDArray
from lightphe import LightPHE

# project dependencies
from deepface.commons import image_utils
from deepface.modules import modeling, detection, preprocessing
from deepface.models.FacialRecognition import FacialRecognition
from deepface.modules.normalization import normalize_embedding_l2, normalize_embedding_minmax
from deepface.modules.encryption import encrypt_embeddings
from deepface.modules.exceptions import SpoofDetected
from deepface.commons.logger import Logger

logger = Logger()


# pylint: disable=too-many-positional-arguments
def represent(
    img_path: Union[str, IO[bytes], NDArray[Any], Sequence[Union[str, NDArray[Any], IO[bytes]]]],
    model_name: str = "VGG-Face",
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
    anti_spoofing: bool = False,
    max_faces: Optional[int] = None,
    l2_normalize: bool = False,
    minmax_normalize: bool = False,
    return_face: bool = False,
    cryptosystem: Optional[LightPHE] = None,
) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
    """
    Represent facial images as multi-dimensional vector embeddings.

    Args:
        img_path (str, np.ndarray, or Sequence[Union[str, np.ndarray]]):
            The exact path to the image, a numpy array in BGR format,
            a base64 encoded image, or a sequence of these.
            If the source image contains multiple faces,
            the result will include information for each detected face.

        model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
            OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet

        enforce_detection (boolean): If no face is detected in an image, raise an exception.
            Default is True. Set to False to avoid the exception for low-resolution images.

        detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
            'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
            'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l'
            'centerface' or 'skip'.

        align (boolean): Perform alignment based on the eye positions.

        expand_percentage (int): expand detected facial area with a percentage (default is 0).

        normalization (string): Normalize the input image before feeding it to the model.
            Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace

        anti_spoofing (boolean): Flag to enable anti spoofing (default is False).

        max_faces (int): Set a limit on the number of faces to be processed (default is None).

        l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
            of the output embeddings

        minmax_normalize (bool): Flag to enable min-max normalization of the output embeddings
            to the range [0, 1].

        return_face (bool): If True, the detected face images will also be returned along
            with embeddings. Default is False.

        cryptosystem (LightPHE): An instance of a partially homomorphic encryption system
            to encrypt the output embeddings. If provided, the embeddings will be encrypted
            using the specified cryptosystem. Then, you will be able to perform homomorphic
            operations on the encrypted embeddings without decrypting them first.
            Check out the repo to find out more: https://github.com/serengil/lightphe

    Returns:
        results (List[Dict[str, Any]] or List[Dict[str, Any]]): A list of dictionaries.
            Result type becomes List of List of Dict if batch input passed.
            Each containing the following fields:

        - embedding (List[float]): Multidimensional vector representing facial features.
            The number of dimensions varies based on the reference model
            (e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
        - facial_area (dict): Detected facial area by face detection in dictionary format.
            Contains 'x' and 'y' as the left-corner point, and 'w' and 'h'
            as the width and height. If `detector_backend` is set to 'skip', it represents
            the full image area and is nonsensical.
        - face_confidence (float): Confidence score of face detection. If `detector_backend` is set
            to 'skip', the confidence will be 0 and is nonsensical.
        - encrypted_embedding (List[Any]): Encrypted multidimensional vector representing
            facial features. This field is included only if a `cryptosystem` is provided.
    """
    resp_objs = []

    model: FacialRecognition = modeling.build_model(
        task="facial_recognition", model_name=model_name
    )

    # Handle list of image paths or 4D numpy array
    if isinstance(img_path, list):
        images = img_path
    elif isinstance(img_path, np.ndarray) and img_path.ndim == 4:
        images = [img_path[i] for i in range(img_path.shape[0])]
    else:
        images = [img_path]

    batch_images, batch_regions, batch_confidences, batch_indexes = [], [], [], []

    for idx, single_img_path in enumerate(images):
        # we have run pre-process in verification. so, skip if it is coming from verify.
        target_size = model.input_shape
        if detector_backend != "skip":
            # Images are returned in RGB format.
            img_objs: List[Dict[str, Any]] = cast(
                List[Dict[str, Any]],
                detection.extract_faces(
                    img_path=single_img_path,
                    detector_backend=detector_backend,
                    grayscale=False,
                    enforce_detection=enforce_detection,
                    align=align,
                    expand_percentage=expand_percentage,
                    anti_spoofing=anti_spoofing,
                    max_faces=max_faces,
                ),
            )
        else:  # skip
            # Try load. If load error, will raise exception internal
            img, _ = image_utils.load_image(single_img_path)

            if len(img.shape) != 3:
                raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}")

            # Convert to RGB format to keep compatability with `extract_faces`.
            img = img[:, :, ::-1]

            # make dummy region and confidence to keep compatibility with `extract_faces`
            img_objs = [
                {
                    "face": img,
                    "facial_area": {"x": 0, "y": 0, "w": img.shape[0], "h": img.shape[1]},
                    "confidence": 0,
                }
            ]
        # ---------------------------------

        if max_faces is not None and max_faces < len(img_objs):
            # sort as largest facial areas come first
            img_objs = sorted(
                img_objs,
                key=lambda img_obj: img_obj["facial_area"]["w"] * img_obj["facial_area"]["h"],
                reverse=True,
            )
            # discard rest of the items
            img_objs = img_objs[0:max_faces]

        for img_obj in img_objs:
            if anti_spoofing is True and img_obj.get("is_real", True) is False:
                raise SpoofDetected("Spoof detected in the given image.")

            img = img_obj["face"]

            # rgb to bgr
            img = img[:, :, ::-1]

            region = img_obj["facial_area"]
            confidence = img_obj["confidence"]

            # resize to expected shape of ml model
            img = preprocessing.resize_image(
                img=img,
                # thanks to DeepId (!)
                target_size=(target_size[1], target_size[0]),
            )

            # custom normalization
            img = preprocessing.normalize_input(img=img, normalization=normalization)

            batch_images.append(img)
            batch_regions.append(region)
            batch_confidences.append(confidence)
            batch_indexes.append(idx)

    # Convert list of images to a numpy array for batch processing
    batch_images_np = np.concatenate(batch_images, axis=0)

    # Forward pass through the model for the entire batch
    embeddings = model.forward(batch_images_np)

    if minmax_normalize:
        embeddings = normalize_embedding_minmax(model_name, embeddings)

    if l2_normalize:
        embeddings = normalize_embedding_l2(embeddings)

    encrypted_embeddings = encrypt_embeddings(embeddings, cryptosystem)

    resp_objs_dict = defaultdict(list)
    for idy, batch_index in enumerate(batch_indexes):
        resp_obj = {
            "embedding": embeddings if len(batch_images) == 1 else embeddings[idy],
            "facial_area": batch_regions[idy],
            "face_confidence": batch_confidences[idy],
        }

        if return_face:
            resp_obj["face"] = batch_images_np[idy]
        if cryptosystem is not None and encrypted_embeddings is not None:
            resp_obj["encrypted_embedding"] = (
                encrypted_embeddings if len(batch_images) == 1 else encrypted_embeddings[idy]
            )
        resp_objs_dict[batch_index].append(resp_obj)

    resp_objs = [resp_objs_dict[idx] for idx in range(len(images))]

    return resp_objs[0] if len(images) == 1 else resp_objs