Spaces:
Running
Running
| # built-in dependencies | |
| import time | |
| from typing import Any, Dict, Optional, Union, List, Tuple, IO, cast | |
| import math | |
| # 3rd party dependencies | |
| import numpy as np | |
| from numpy.typing import NDArray | |
| # project dependencies | |
| from deepface.modules import representation, detection, modeling | |
| from deepface.models.FacialRecognition import FacialRecognition | |
| from deepface.commons.logger import Logger | |
| from deepface.config.confidence import confidences | |
| from deepface.config.threshold import thresholds | |
| from deepface.modules.exceptions import ( | |
| SpoofDetected, | |
| DimensionMismatchError, | |
| DataTypeError, | |
| InvalidEmbeddingsShapeError, | |
| ) | |
| logger = Logger() | |
| # pylint: disable=too-many-positional-arguments, no-else-return | |
| def verify( | |
| img1_path: Union[str, NDArray[Any], List[float], IO[bytes]], | |
| img2_path: Union[str, NDArray[Any], List[float], IO[bytes]], | |
| model_name: str = "VGG-Face", | |
| detector_backend: str = "opencv", | |
| distance_metric: str = "cosine", | |
| enforce_detection: bool = True, | |
| align: bool = True, | |
| expand_percentage: int = 0, | |
| normalization: str = "base", | |
| silent: bool = False, | |
| threshold: Optional[float] = None, | |
| anti_spoofing: bool = False, | |
| ) -> Dict[str, Any]: | |
| print("\n" + "="*50) | |
| print("🚀 HELLO! CODE ĐANG CHẠY QUA FILE VERIFICATION.PY CỦA TÔI!") | |
| print("="*50 + "\n") | |
| """ | |
| Verify if an image pair represents the same person or different persons. | |
| The verification function converts facial images to vectors and calculates the similarity | |
| between those vectors. Vectors of images of the same person should exhibit higher similarity | |
| (or lower distance) than vectors of images of different persons. | |
| Args: | |
| img1_path (str or np.ndarray or List[float]): Path to the first image. | |
| Accepts exact image path as a string, numpy array (BGR), base64 encoded images | |
| or pre-calculated embeddings. | |
| img2_path (str or np.ndarray or or List[float]): Path to the second image. | |
| Accepts exact image path as a string, numpy array (BGR), base64 encoded images | |
| or pre-calculated embeddings. | |
| model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, | |
| OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face). | |
| detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', | |
| 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n', | |
| 'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l' | |
| 'centerface' or 'skip' (default is opencv) | |
| distance_metric (string): Metric for measuring similarity. Options: 'cosine', | |
| 'euclidean', 'euclidean_l2', 'angular' (default is cosine). | |
| enforce_detection (boolean): If no face is detected in an image, raise an exception. | |
| Set to False to avoid the exception for low-resolution images (default is True). | |
| align (bool): Flag to enable face alignment (default is True). | |
| expand_percentage (int): expand detected facial area with a percentage (default is 0). | |
| normalization (string): Normalize the input image before feeding it to the model. | |
| Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) | |
| silent (boolean): Suppress or allow some log messages for a quieter analysis process | |
| (default is False). | |
| threshold (float): Specify a threshold to determine whether a pair represents the same | |
| person or different individuals. This threshold is used for comparing distances. | |
| If left unset, default pre-tuned threshold values will be applied based on the specified | |
| model name and distance metric (default is None). | |
| anti_spoofing (boolean): Flag to enable anti spoofing (default is False). | |
| Returns: | |
| result (dict): A dictionary containing verification results. | |
| - 'verified' (bool): Indicates whether the images represent the same person (True) | |
| or different persons (False). | |
| - 'distance' (float): The distance measure between the face vectors. | |
| A lower distance indicates higher similarity. | |
| - 'threshold' (float): The maximum threshold used for verification. | |
| If the distance is below this threshold, the images are considered a match. | |
| - 'confidence' (float): Confidence score indicating the likelihood that the images | |
| represent the same person. The score is between 0 and 100, where higher values | |
| indicate greater confidence in the verification result. | |
| - 'model' (str): The chosen face recognition model. | |
| - 'similarity_metric' (str): The chosen similarity metric for measuring distances. | |
| - 'facial_areas' (dict): Rectangular regions of interest for faces in both images. | |
| - 'img1': {'x': int, 'y': int, 'w': int, 'h': int} | |
| Region of interest for the first image. | |
| - 'img2': {'x': int, 'y': int, 'w': int, 'h': int} | |
| Region of interest for the second image. | |
| - 'time' (float): Time taken for the verification process in seconds. | |
| """ | |
| tic = time.time() | |
| model: FacialRecognition = modeling.build_model( | |
| task="facial_recognition", model_name=model_name | |
| ) | |
| dims = model.output_shape | |
| no_facial_area = { | |
| "x": None, | |
| "y": None, | |
| "w": None, | |
| "h": None, | |
| "left_eye": None, | |
| "right_eye": None, | |
| } | |
| def extract_embeddings_and_facial_areas( | |
| img_path: Union[str, NDArray[Any], List[float], IO[bytes]], index: int | |
| ) -> Tuple[List[List[float]], List[Dict[str, Any]]]: | |
| """ | |
| Extracts facial embeddings and corresponding facial areas from an | |
| image or returns pre-calculated embeddings. | |
| Depending on the type of img_path, the function either extracts | |
| facial embeddings from the provided image | |
| (via a path or NumPy array) or verifies that the input is a list of | |
| pre-calculated embeddings and validates them. | |
| Args: | |
| img_path (Union[str, np.ndarray, List[float]]): | |
| - A string representing the file path to an image, | |
| - A NumPy array containing the image data, | |
| - Or a list of pre-calculated embedding values (of type `float`). | |
| - Or a file-like object containing image data (e.g., bytes). | |
| index (int): An index value used in error messages and logging | |
| to identify the number of the image. | |
| Returns: | |
| Tuple[List[List[float]], List[dict]]: | |
| - A list containing lists of facial embeddings for each detected face. | |
| - A list of dictionaries where each dictionary contains facial area information. | |
| """ | |
| if isinstance(img_path, list): | |
| # given image is already pre-calculated embedding | |
| if not all(isinstance(dim, (float, int)) for dim in img_path): | |
| raise DataTypeError( | |
| f"When passing img{index}_path as a list," | |
| " ensure that all its items are of type float." | |
| ) | |
| if silent is False: | |
| logger.warn( | |
| f"You passed {index}-th image as pre-calculated embeddings." | |
| "Please ensure that embeddings have been calculated" | |
| f" for the {model_name} model." | |
| ) | |
| if len(img_path) != dims: | |
| raise DimensionMismatchError( | |
| f"embeddings of {model_name} should have {dims} dimensions," | |
| f" but {index}-th image has {len(img_path)} dimensions input" | |
| ) | |
| img_embeddings = [img_path] | |
| img_facial_areas = [no_facial_area] | |
| else: | |
| try: | |
| img_embeddings, img_facial_areas = __extract_faces_and_embeddings( | |
| img_path=img_path, | |
| model_name=model_name, | |
| detector_backend=detector_backend, | |
| enforce_detection=enforce_detection, | |
| align=align, | |
| expand_percentage=expand_percentage, | |
| normalization=normalization, | |
| anti_spoofing=anti_spoofing, | |
| ) | |
| except ValueError as err: | |
| raise ValueError(f"Exception while processing img{index}_path") from err | |
| return img_embeddings, img_facial_areas | |
| img1_embeddings, img1_facial_areas = extract_embeddings_and_facial_areas(img1_path, 1) | |
| img2_embeddings, img2_facial_areas = extract_embeddings_and_facial_areas(img2_path, 2) | |
| min_distance, min_idx, min_idy = float("inf"), None, None | |
| for idx, img1_embedding in enumerate(img1_embeddings): | |
| for idy, img2_embedding in enumerate(img2_embeddings): | |
| distance: float = float( | |
| cast(np.float64, find_distance(img1_embedding, img2_embedding, distance_metric)) | |
| ) | |
| if distance < min_distance: | |
| min_distance, min_idx, min_idy = distance, idx, idy | |
| # find the face pair with minimum distance | |
| pretuned_threshold = find_threshold(model_name, distance_metric) | |
| threshold = threshold or pretuned_threshold | |
| distance = float(min_distance) | |
| confidence = find_confidence( | |
| distance=distance, | |
| model_name=model_name, | |
| distance_metric=distance_metric, | |
| verified=distance <= pretuned_threshold, | |
| ) | |
| facial_areas = ( | |
| no_facial_area if min_idx is None else img1_facial_areas[min_idx], | |
| no_facial_area if min_idy is None else img2_facial_areas[min_idy], | |
| ) | |
| toc = time.time() | |
| resp_obj = { | |
| "verified": distance <= threshold, | |
| "distance": distance, | |
| "threshold": threshold, | |
| "confidence": confidence, | |
| "model": model_name, | |
| "detector_backend": detector_backend, | |
| "similarity_metric": distance_metric, | |
| "facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]}, | |
| "time": round(toc - tic, 2), | |
| } | |
| return resp_obj | |
| def __extract_faces_and_embeddings( | |
| img_path: Union[str, NDArray[Any], IO[bytes]], | |
| model_name: str = "VGG-Face", | |
| detector_backend: str = "opencv", | |
| enforce_detection: bool = True, | |
| align: bool = True, | |
| expand_percentage: int = 0, | |
| normalization: str = "base", | |
| anti_spoofing: bool = False, | |
| ) -> Tuple[List[List[float]], List[Dict[str, Any]]]: | |
| """ | |
| Extract facial areas and find corresponding embeddings for given image | |
| Returns: | |
| embeddings (List[float]) | |
| facial areas (List[dict]) | |
| """ | |
| embeddings = [] | |
| facial_areas = [] | |
| img_objs: List[Dict[str, Any]] = cast( | |
| List[Dict[str, Any]], | |
| detection.extract_faces( | |
| img_path=img_path, | |
| detector_backend=detector_backend, | |
| grayscale=False, | |
| enforce_detection=enforce_detection, | |
| align=align, | |
| expand_percentage=expand_percentage, | |
| anti_spoofing=anti_spoofing, | |
| ), | |
| ) | |
| # find embeddings for each face | |
| for img_obj in img_objs: | |
| if anti_spoofing is True and img_obj.get("is_real", True) is False: | |
| raise SpoofDetected("Spoof detected in given image.") | |
| img_embedding_obj = representation.represent( | |
| img_path=img_obj["face"][:, :, ::-1], # make compatible with direct representation call | |
| model_name=model_name, | |
| enforce_detection=enforce_detection, | |
| detector_backend="skip", | |
| align=align, | |
| normalization=normalization, | |
| ) | |
| # already extracted face given, safe to access its 1st item | |
| img_embedding_obj = cast(List[Dict[str, Any]], img_embedding_obj) | |
| img_embedding = img_embedding_obj[0]["embedding"] | |
| embeddings.append(img_embedding) | |
| facial_areas.append(img_obj["facial_area"]) | |
| return embeddings, facial_areas | |
| def find_cosine_distance( | |
| source_representation: Union[NDArray[Any], List[float]], | |
| test_representation: Union[NDArray[Any], List[float]], | |
| ) -> Union[np.float64, NDArray[Any]]: | |
| """ | |
| Find cosine distance between two given vectors or batches of vectors. | |
| Args: | |
| source_representation (np.ndarray or list): 1st vector or batch of vectors. | |
| test_representation (np.ndarray or list): 2nd vector or batch of vectors. | |
| Returns | |
| np.float64 or np.ndarray: Calculated cosine distance(s). | |
| It returns a np.float64 for single embeddings and np.ndarray for batch embeddings. | |
| """ | |
| # Convert inputs to numpy arrays if necessary | |
| source_representation = np.asarray(source_representation) | |
| test_representation = np.asarray(test_representation) | |
| if source_representation.ndim == 1 and test_representation.ndim == 1: | |
| # single embedding | |
| dot_product = np.dot(source_representation, test_representation) | |
| source_norm = np.linalg.norm(source_representation) | |
| test_norm = np.linalg.norm(test_representation) | |
| distances = 1 - dot_product / (source_norm * test_norm) | |
| return cast(np.float64, distances) | |
| elif source_representation.ndim == 2 and test_representation.ndim == 2: | |
| # list of embeddings (batch) | |
| source_normed = l2_normalize(source_representation, axis=1) # (N, D) | |
| test_normed = l2_normalize(test_representation, axis=1) # (M, D) | |
| cosine_similarities = np.dot(test_normed, source_normed.T) # (M, N) | |
| distances = 1 - cosine_similarities | |
| return cast(NDArray[Any], distances) | |
| else: | |
| raise InvalidEmbeddingsShapeError( | |
| f"Embeddings must be 1D or 2D, but received " | |
| f"source shape: {source_representation.shape}, test shape: {test_representation.shape}" | |
| ) | |
| def find_angular_distance( | |
| source_representation: Union[NDArray[Any], List[float]], | |
| test_representation: Union[NDArray[Any], List[float]], | |
| ) -> Union[np.float64, NDArray[Any]]: | |
| """ | |
| Find angular distance between two vectors or batches of vectors. | |
| Args: | |
| source_representation (np.ndarray or list): 1st vector or batch of vectors. | |
| test_representation (np.ndarray or list): 2nd vector or batch of vectors. | |
| Returns: | |
| np.float64 or np.ndarray: angular distance(s). | |
| Returns a np.float64 for single embeddings and np.ndarray for batch embeddings. | |
| """ | |
| # calculate cosine similarity first | |
| # then convert to angular distance | |
| source_representation = np.asarray(source_representation) | |
| test_representation = np.asarray(test_representation) | |
| if source_representation.ndim == 1 and test_representation.ndim == 1: | |
| # single embedding | |
| dot_product = np.dot(source_representation, test_representation) | |
| source_norm = np.linalg.norm(source_representation) | |
| test_norm = np.linalg.norm(test_representation) | |
| similarity = dot_product / (source_norm * test_norm) | |
| distances = np.arccos(similarity) / np.pi | |
| return cast(np.float64, distances) | |
| elif source_representation.ndim == 2 and test_representation.ndim == 2: | |
| # list of embeddings (batch) | |
| source_normed = l2_normalize(source_representation, axis=1) # (N, D) | |
| test_normed = l2_normalize(test_representation, axis=1) # (M, D) | |
| similarity = np.dot(test_normed, source_normed.T) # (M, N) | |
| distances = np.arccos(similarity) / np.pi | |
| return cast(NDArray[Any], distances) | |
| else: | |
| raise ValueError( | |
| f"Embeddings must be 1D or 2D, but received " | |
| f"source shape: {source_representation.shape}, test shape: {test_representation.shape}" | |
| ) | |
| def find_euclidean_distance( | |
| source_representation: Union[NDArray[Any], List[float]], | |
| test_representation: Union[NDArray[Any], List[float]], | |
| ) -> Union[np.float64, NDArray[Any]]: | |
| """ | |
| Find Euclidean distance between two vectors or batches of vectors. | |
| Args: | |
| source_representation (np.ndarray or list): 1st vector or batch of vectors. | |
| test_representation (np.ndarray or list): 2nd vector or batch of vectors. | |
| Returns: | |
| np.float64 or np.ndarray: Euclidean distance(s). | |
| Returns a np.float64 for single embeddings and np.ndarray for batch embeddings. | |
| """ | |
| # Convert inputs to numpy arrays if necessary | |
| source_representation = np.asarray(source_representation) | |
| test_representation = np.asarray(test_representation) | |
| # Single embedding case (1D arrays) | |
| if source_representation.ndim == 1 and test_representation.ndim == 1: | |
| distances = np.linalg.norm(source_representation - test_representation) | |
| return cast(np.float64, distances) | |
| # Batch embeddings case (2D arrays) | |
| elif source_representation.ndim == 2 and test_representation.ndim == 2: | |
| diff = ( | |
| source_representation[None, :, :] - test_representation[:, None, :] | |
| ) # (N, D) - (M, D) = (M, N, D) | |
| distances = np.linalg.norm(diff, axis=2) # (M, N) | |
| return cast(NDArray[Any], distances) | |
| else: | |
| raise ValueError( | |
| f"Embeddings must be 1D or 2D, but received " | |
| f"source shape: {source_representation.shape}, test shape: {test_representation.shape}" | |
| ) | |
| def l2_normalize( | |
| x: Union[NDArray[Any], List[float], List[List[float]]], | |
| axis: Union[int, None] = None, | |
| epsilon: float = 1e-10, | |
| ) -> NDArray[Any]: | |
| """ | |
| Normalize input vector with l2 | |
| Args: | |
| x (np.ndarray or list): given vector | |
| axis (int): axis along which to normalize | |
| Returns: | |
| np.ndarray: l2 normalized vector | |
| """ | |
| # Convert inputs to numpy arrays if necessary | |
| x = np.asarray(x) | |
| norm = np.linalg.norm(x, axis=axis, keepdims=True) | |
| return cast(NDArray[Any], x / (norm + epsilon)) | |
| def find_distance( | |
| alpha_embedding: Union[NDArray[Any], List[float]], | |
| beta_embedding: Union[NDArray[Any], List[float]], | |
| distance_metric: str, | |
| ) -> Union[np.float64, NDArray[Any]]: | |
| """ | |
| Wrapper to find the distance between vectors based on the specified distance metric. | |
| Args: | |
| alpha_embedding (np.ndarray or list): 1st vector or batch of vectors. | |
| beta_embedding (np.ndarray or list): 2nd vector or batch of vectors. | |
| distance_metric (str): The type of distance to compute | |
| ('cosine', 'euclidean', 'euclidean_l2', or 'angular'). | |
| Returns: | |
| np.float64 or np.ndarray: The calculated distance(s). | |
| """ | |
| # Convert inputs to numpy arrays if necessary | |
| alpha_embedding = np.asarray(alpha_embedding) | |
| beta_embedding = np.asarray(beta_embedding) | |
| # Ensure that both embeddings are either 1D or 2D | |
| if alpha_embedding.ndim != beta_embedding.ndim or alpha_embedding.ndim not in (1, 2): | |
| raise ValueError( | |
| f"Both embeddings must be either 1D or 2D, but received " | |
| f"alpha shape: {alpha_embedding.shape}, beta shape: {beta_embedding.shape}" | |
| ) | |
| if distance_metric == "cosine": | |
| distance = find_cosine_distance(alpha_embedding, beta_embedding) | |
| elif distance_metric == "angular": | |
| distance = find_angular_distance(alpha_embedding, beta_embedding) | |
| elif distance_metric == "euclidean": | |
| distance = find_euclidean_distance(alpha_embedding, beta_embedding) | |
| elif distance_metric == "euclidean_l2": | |
| axis = None if alpha_embedding.ndim == 1 else 1 | |
| normalized_alpha = l2_normalize(alpha_embedding, axis=axis) | |
| normalized_beta = l2_normalize(beta_embedding, axis=axis) | |
| distance = find_euclidean_distance(normalized_alpha, normalized_beta) | |
| else: | |
| raise ValueError("Invalid distance_metric passed - ", distance_metric) | |
| return np.round(distance, 6) | |
| def find_threshold(model_name: str, distance_metric: str) -> float: | |
| """ | |
| Retrieve pre-tuned threshold values for a model and distance metric pair | |
| Args: | |
| model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, | |
| OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face). | |
| distance_metric (str): distance metric name. Options are cosine, euclidean | |
| euclidean_l2 and angular. | |
| Returns: | |
| threshold (float): threshold value for that model name and distance metric | |
| pair. Distances less than this threshold will be classified same person. | |
| """ | |
| if thresholds.get(model_name) is None: | |
| raise ValueError(f"Model {model_name} is not supported. ") | |
| threshold = thresholds.get(model_name, {}).get(distance_metric) | |
| if threshold is None: | |
| raise ValueError( | |
| f"Distance metric {distance_metric} is not available for model {model_name}. " | |
| ) | |
| return threshold | |
| def __sigmoid(z: float) -> float: | |
| """ | |
| Compute a numerically stable sigmoid-based confidence score. | |
| This implementation avoids floating-point overflow errors that can occur | |
| when computing the standard sigmoid function (1 / (1 + exp(-z))) for very | |
| large positive or negative values of `z`. The computation is split based on | |
| the sign of `z` to ensure numerical stability while preserving mathematical | |
| equivalence. | |
| Args: | |
| z (float): Input value. | |
| Returns: | |
| float: Sigmoid output scaled to the range [0, 1]. | |
| """ | |
| if z >= 0: | |
| return 1 / (1 + math.exp(-z)) | |
| else: | |
| ez = math.exp(z) | |
| return 1 * ez / (1 + ez) | |
| def find_confidence( | |
| distance: float, model_name: str, distance_metric: str, verified: bool | |
| ) -> float: | |
| """ | |
| Using pre-built logistic regression model, find confidence value from distance. | |
| The confidence score provides a probalistic estimate, indicating how likely | |
| the classification is correct, thus giving softer, more informative measure of | |
| certainty than a simple binary classification. | |
| Configuration values are calculated in experiments/distance-to-confidence.ipynb | |
| Args: | |
| model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, | |
| OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face). | |
| distance_metric (str): distance metric name. Options are cosine, euclidean | |
| euclidean_l2 and angular. | |
| verified (bool): True if the images are classified as same person, | |
| False if different persons. | |
| Returns: | |
| confidence (float): confidence value being same person for that model name | |
| and distance metric pair. Same person classifications confidence should be | |
| distributed between 51-100% and different person classifications confidence | |
| should be distributed between 0-49%. The higher the confidence, the more | |
| certain the model is about the classification. | |
| """ | |
| if distance <= 0: | |
| return 100.0 if verified else 0.0 | |
| if confidences.get(model_name) is None: | |
| return 51 if verified else 49 | |
| config = confidences[model_name].get(distance_metric) | |
| if config is None: | |
| return 51 if verified else 49 | |
| w = config["w"] | |
| b = config["b"] | |
| normalizer = config["normalizer"] | |
| denorm_max_true = config["denorm_max_true"] | |
| denorm_min_true = config["denorm_min_true"] | |
| denorm_max_false = config["denorm_max_false"] | |
| denorm_min_false = config["denorm_min_false"] | |
| if normalizer > 1: | |
| distance = distance / normalizer | |
| z = w * distance + b | |
| confidence = 100 * __sigmoid(z) | |
| # re-distribute the confidence between 0-49 for different persons, 51-100 for same persons | |
| if verified: | |
| min_original = denorm_min_true | |
| max_original = denorm_max_true | |
| min_target = max(51, min_original) | |
| max_target = 100 | |
| else: | |
| min_original = denorm_min_false | |
| max_original = denorm_max_false | |
| min_target = 0 | |
| max_target = min(49, int(max_original)) | |
| confidence_distributed = ((confidence - min_original) / (max_original - min_original)) * ( | |
| max_target - min_target | |
| ) + min_target | |
| # ensure confidence is within 51-100 for same persons and 0-49 for different persons | |
| if verified and confidence_distributed < 51: | |
| confidence_distributed = 51 | |
| elif not verified and confidence_distributed > 49: | |
| confidence_distributed = 49 | |
| # ensure confidence is within 0-100 | |
| if confidence_distributed < 0: | |
| confidence_distributed = 0 | |
| elif confidence_distributed > 100: | |
| confidence_distributed = 100 | |
| return round(confidence_distributed, 2) | |