Spaces:

daomanhduc
/

datn-face-ai

Running

datn-face-ai / deepface /modules /representation.py

DaoManhDuc2004

Deploy DATN face AI server

b5d3a91 12 days ago

9.61 kB

	# built-in dependencies
	from typing import Any, Dict, List, Union, Optional, Sequence, IO, cast
	from collections import defaultdict

	# 3rd party dependencies
	import numpy as np
	from numpy.typing import NDArray
	from lightphe import LightPHE

	# project dependencies
	from deepface.commons import image_utils
	from deepface.modules import modeling, detection, preprocessing
	from deepface.models.FacialRecognition import FacialRecognition
	from deepface.modules.normalization import normalize_embedding_l2, normalize_embedding_minmax
	from deepface.modules.encryption import encrypt_embeddings
	from deepface.modules.exceptions import SpoofDetected
	from deepface.commons.logger import Logger

	logger = Logger()


	# pylint: disable=too-many-positional-arguments
	def represent(
	img_path: Union[str, IO[bytes], NDArray[Any], Sequence[Union[str, NDArray[Any], IO[bytes]]]],
	model_name: str = "VGG-Face",
	enforce_detection: bool = True,
	detector_backend: str = "opencv",
	align: bool = True,
	expand_percentage: int = 0,
	normalization: str = "base",
	anti_spoofing: bool = False,
	max_faces: Optional[int] = None,
	l2_normalize: bool = False,
	minmax_normalize: bool = False,
	return_face: bool = False,
	cryptosystem: Optional[LightPHE] = None,
	) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
	"""
	Represent facial images as multi-dimensional vector embeddings.

	Args:
	img_path (str, np.ndarray, or Sequence[Union[str, np.ndarray]]):
	The exact path to the image, a numpy array in BGR format,
	a base64 encoded image, or a sequence of these.
	If the source image contains multiple faces,
	the result will include information for each detected face.

	model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
	OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet

	enforce_detection (boolean): If no face is detected in an image, raise an exception.
	Default is True. Set to False to avoid the exception for low-resolution images.

	detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
	'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
	'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l'
	'centerface' or 'skip'.

	align (boolean): Perform alignment based on the eye positions.

	expand_percentage (int): expand detected facial area with a percentage (default is 0).

	normalization (string): Normalize the input image before feeding it to the model.
	Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace

	anti_spoofing (boolean): Flag to enable anti spoofing (default is False).

	max_faces (int): Set a limit on the number of faces to be processed (default is None).

	l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
	of the output embeddings

	minmax_normalize (bool): Flag to enable min-max normalization of the output embeddings
	to the range [0, 1].

	return_face (bool): If True, the detected face images will also be returned along
	with embeddings. Default is False.

	cryptosystem (LightPHE): An instance of a partially homomorphic encryption system
	to encrypt the output embeddings. If provided, the embeddings will be encrypted
	using the specified cryptosystem. Then, you will be able to perform homomorphic
	operations on the encrypted embeddings without decrypting them first.
	Check out the repo to find out more: https://github.com/serengil/lightphe

	Returns:
	results (List[Dict[str, Any]] or List[Dict[str, Any]]): A list of dictionaries.
	Result type becomes List of List of Dict if batch input passed.
	Each containing the following fields:

	- embedding (List[float]): Multidimensional vector representing facial features.
	The number of dimensions varies based on the reference model
	(e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
	- facial_area (dict): Detected facial area by face detection in dictionary format.
	Contains 'x' and 'y' as the left-corner point, and 'w' and 'h'
	as the width and height. If `detector_backend` is set to 'skip', it represents
	the full image area and is nonsensical.
	- face_confidence (float): Confidence score of face detection. If `detector_backend` is set
	to 'skip', the confidence will be 0 and is nonsensical.
	- encrypted_embedding (List[Any]): Encrypted multidimensional vector representing
	facial features. This field is included only if a `cryptosystem` is provided.
	"""
	resp_objs = []

	model: FacialRecognition = modeling.build_model(
	task="facial_recognition", model_name=model_name
	)

	# Handle list of image paths or 4D numpy array
	if isinstance(img_path, list):
	images = img_path
	elif isinstance(img_path, np.ndarray) and img_path.ndim == 4:
	images = [img_path[i] for i in range(img_path.shape[0])]
	else:
	images = [img_path]

	batch_images, batch_regions, batch_confidences, batch_indexes = [], [], [], []

	for idx, single_img_path in enumerate(images):
	# we have run pre-process in verification. so, skip if it is coming from verify.
	target_size = model.input_shape
	if detector_backend != "skip":
	# Images are returned in RGB format.
	img_objs: List[Dict[str, Any]] = cast(
	List[Dict[str, Any]],
	detection.extract_faces(
	img_path=single_img_path,
	detector_backend=detector_backend,
	grayscale=False,
	enforce_detection=enforce_detection,
	align=align,
	expand_percentage=expand_percentage,
	anti_spoofing=anti_spoofing,
	max_faces=max_faces,
	),
	)
	else: # skip
	# Try load. If load error, will raise exception internal
	img, _ = image_utils.load_image(single_img_path)

	if len(img.shape) != 3:
	raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}")

	# Convert to RGB format to keep compatability with `extract_faces`.
	img = img[:, :, ::-1]

	# make dummy region and confidence to keep compatibility with `extract_faces`
	img_objs = [
	{
	"face": img,
	"facial_area": {"x": 0, "y": 0, "w": img.shape[0], "h": img.shape[1]},
	"confidence": 0,
	}
	]
	# ---------------------------------

	if max_faces is not None and max_faces < len(img_objs):
	# sort as largest facial areas come first
	img_objs = sorted(
	img_objs,
	key=lambda img_obj: img_obj["facial_area"]["w"] * img_obj["facial_area"]["h"],
	reverse=True,
	)
	# discard rest of the items
	img_objs = img_objs[0:max_faces]

	for img_obj in img_objs:
	if anti_spoofing is True and img_obj.get("is_real", True) is False:
	raise SpoofDetected("Spoof detected in the given image.")

	img = img_obj["face"]

	# rgb to bgr
	img = img[:, :, ::-1]

	region = img_obj["facial_area"]
	confidence = img_obj["confidence"]

	# resize to expected shape of ml model
	img = preprocessing.resize_image(
	img=img,
	# thanks to DeepId (!)
	target_size=(target_size[1], target_size[0]),
	)

	# custom normalization
	img = preprocessing.normalize_input(img=img, normalization=normalization)

	batch_images.append(img)
	batch_regions.append(region)
	batch_confidences.append(confidence)
	batch_indexes.append(idx)

	# Convert list of images to a numpy array for batch processing
	batch_images_np = np.concatenate(batch_images, axis=0)

	# Forward pass through the model for the entire batch
	embeddings = model.forward(batch_images_np)

	if minmax_normalize:
	embeddings = normalize_embedding_minmax(model_name, embeddings)

	if l2_normalize:
	embeddings = normalize_embedding_l2(embeddings)

	encrypted_embeddings = encrypt_embeddings(embeddings, cryptosystem)

	resp_objs_dict = defaultdict(list)
	for idy, batch_index in enumerate(batch_indexes):
	resp_obj = {
	"embedding": embeddings if len(batch_images) == 1 else embeddings[idy],
	"facial_area": batch_regions[idy],
	"face_confidence": batch_confidences[idy],
	}

	if return_face:
	resp_obj["face"] = batch_images_np[idy]
	if cryptosystem is not None and encrypted_embeddings is not None:
	resp_obj["encrypted_embedding"] = (
	encrypted_embeddings if len(batch_images) == 1 else encrypted_embeddings[idy]
	)
	resp_objs_dict[batch_index].append(resp_obj)

	resp_objs = [resp_objs_dict[idx] for idx in range(len(images))]

	return resp_objs[0] if len(images) == 1 else resp_objs