File size: 14,303 Bytes
677c57e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 |
# built-in dependencies
import time
from typing import Any, Dict, Union, List, Tuple
# 3rd party dependencies
import numpy as np
# project dependencies
from deepface.modules import representation, detection, modeling
from deepface.models.FacialRecognition import FacialRecognition
from deepface.commons import logger as log
logger = log.get_singletonish_logger()
def verify(
img1_path: Union[str, np.ndarray, List[float]],
img2_path: Union[str, np.ndarray, List[float]],
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
distance_metric: str = "cosine",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
silent: bool = False,
) -> Dict[str, Any]:
"""
Verify if an image pair represents the same person or different persons.
The verification function converts facial images to vectors and calculates the similarity
between those vectors. Vectors of images of the same person should exhibit higher similarity
(or lower distance) than vectors of images of different persons.
Args:
img1_path (str or np.ndarray or List[float]): Path to the first image.
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
or pre-calculated embeddings.
img2_path (str or np.ndarray or or List[float]): Path to the second image.
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
or pre-calculated embeddings.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
(default is opencv)
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2' (default is cosine).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
Returns:
result (dict): A dictionary containing verification results.
- 'verified' (bool): Indicates whether the images represent the same person (True)
or different persons (False).
- 'distance' (float): The distance measure between the face vectors.
A lower distance indicates higher similarity.
- 'max_threshold_to_verify' (float): The maximum threshold used for verification.
If the distance is below this threshold, the images are considered a match.
- 'model' (str): The chosen face recognition model.
- 'similarity_metric' (str): The chosen similarity metric for measuring distances.
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images.
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int}
Region of interest for the first image.
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int}
Region of interest for the second image.
- 'time' (float): Time taken for the verification process in seconds.
"""
tic = time.time()
model: FacialRecognition = modeling.build_model(model_name)
dims = model.output_shape
# extract faces from img1
if isinstance(img1_path, list):
# given image is already pre-calculated embedding
if not all(isinstance(dim, float) for dim in img1_path):
raise ValueError(
"When passing img1_path as a list, ensure that all its items are of type float."
)
if silent is False:
logger.warn(
"You passed 1st image as pre-calculated embeddings."
f"Please ensure that embeddings have been calculated for the {model_name} model."
)
if len(img1_path) != dims:
raise ValueError(
f"embeddings of {model_name} should have {dims} dimensions,"
f" but it has {len(img1_path)} dimensions input"
)
img1_embeddings = [img1_path]
img1_facial_areas = [None]
else:
try:
img1_embeddings, img1_facial_areas = __extract_faces_and_embeddings(
img_path=img1_path,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
normalization=normalization,
)
except ValueError as err:
raise ValueError("Exception while processing img1_path") from err
# extract faces from img2
if isinstance(img2_path, list):
# given image is already pre-calculated embedding
if not all(isinstance(dim, float) for dim in img2_path):
raise ValueError(
"When passing img2_path as a list, ensure that all its items are of type float."
)
if silent is False:
logger.warn(
"You passed 2nd image as pre-calculated embeddings."
f"Please ensure that embeddings have been calculated for the {model_name} model."
)
if len(img2_path) != dims:
raise ValueError(
f"embeddings of {model_name} should have {dims} dimensions,"
f" but it has {len(img2_path)} dimensions input"
)
img2_embeddings = [img2_path]
img2_facial_areas = [None]
else:
try:
img2_embeddings, img2_facial_areas = __extract_faces_and_embeddings(
img_path=img2_path,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
normalization=normalization,
)
except ValueError as err:
raise ValueError("Exception while processing img2_path") from err
no_facial_area = {
"x": None,
"y": None,
"w": None,
"h": None,
"left_eye": None,
"right_eye": None,
}
distances = []
facial_areas = []
for idx, img1_embedding in enumerate(img1_embeddings):
for idy, img2_embedding in enumerate(img2_embeddings):
distance = find_distance(img1_embedding, img2_embedding, distance_metric)
distances.append(distance)
facial_areas.append(
(img1_facial_areas[idx] or no_facial_area, img2_facial_areas[idy] or no_facial_area)
)
# find the face pair with minimum distance
threshold = find_threshold(model_name, distance_metric)
distance = float(min(distances)) # best distance
facial_areas = facial_areas[np.argmin(distances)]
toc = time.time()
resp_obj = {
"verified": distance <= threshold,
"distance": distance,
"threshold": threshold,
"model": model_name,
"detector_backend": detector_backend,
"similarity_metric": distance_metric,
"facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]},
"time": round(toc - tic, 2),
}
return resp_obj
def __extract_faces_and_embeddings(
img_path: Union[str, np.ndarray],
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
) -> Tuple[List[List[float]], List[dict]]:
"""
Extract facial areas and find corresponding embeddings for given image
Returns:
embeddings (List[float])
facial areas (List[dict])
"""
embeddings = []
facial_areas = []
img_objs = detection.extract_faces(
img_path=img_path,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
# find embeddings for each face
for img_obj in img_objs:
img_embedding_obj = representation.represent(
img_path=img_obj["face"],
model_name=model_name,
enforce_detection=enforce_detection,
detector_backend="skip",
align=align,
normalization=normalization,
)
# already extracted face given, safe to access its 1st item
img_embedding = img_embedding_obj[0]["embedding"]
embeddings.append(img_embedding)
facial_areas.append(img_obj["facial_area"])
return embeddings, facial_areas
def find_cosine_distance(
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> np.float64:
"""
Find cosine distance between two given vectors
Args:
source_representation (np.ndarray or list): 1st vector
test_representation (np.ndarray or list): 2nd vector
Returns
distance (np.float64): calculated cosine distance
"""
if isinstance(source_representation, list):
source_representation = np.array(source_representation)
if isinstance(test_representation, list):
test_representation = np.array(test_representation)
a = np.matmul(np.transpose(source_representation), test_representation)
b = np.sum(np.multiply(source_representation, source_representation))
c = np.sum(np.multiply(test_representation, test_representation))
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
def find_euclidean_distance(
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> np.float64:
"""
Find euclidean distance between two given vectors
Args:
source_representation (np.ndarray or list): 1st vector
test_representation (np.ndarray or list): 2nd vector
Returns
distance (np.float64): calculated euclidean distance
"""
if isinstance(source_representation, list):
source_representation = np.array(source_representation)
if isinstance(test_representation, list):
test_representation = np.array(test_representation)
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray:
"""
Normalize input vector with l2
Args:
x (np.ndarray or list): given vector
Returns:
y (np.ndarray): l2 normalized vector
"""
if isinstance(x, list):
x = np.array(x)
return x / np.sqrt(np.sum(np.multiply(x, x)))
def find_distance(
alpha_embedding: Union[np.ndarray, list],
beta_embedding: Union[np.ndarray, list],
distance_metric: str,
) -> np.float64:
"""
Wrapper to find distance between vectors according to the given distance metric
Args:
source_representation (np.ndarray or list): 1st vector
test_representation (np.ndarray or list): 2nd vector
Returns
distance (np.float64): calculated cosine distance
"""
if distance_metric == "cosine":
distance = find_cosine_distance(alpha_embedding, beta_embedding)
elif distance_metric == "euclidean":
distance = find_euclidean_distance(alpha_embedding, beta_embedding)
elif distance_metric == "euclidean_l2":
distance = find_euclidean_distance(
l2_normalize(alpha_embedding), l2_normalize(beta_embedding)
)
else:
raise ValueError("Invalid distance_metric passed - ", distance_metric)
return distance
def find_threshold(model_name: str, distance_metric: str) -> float:
"""
Retrieve pre-tuned threshold values for a model and distance metric pair
Args:
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
distance_metric (str): distance metric name. Options are cosine, euclidean
and euclidean_l2.
Returns:
threshold (float): threshold value for that model name and distance metric
pair. Distances less than this threshold will be classified same person.
"""
base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75}
thresholds = {
# "VGG-Face": {"cosine": 0.40, "euclidean": 0.60, "euclidean_l2": 0.86}, # 2622d
"VGG-Face": {
"cosine": 0.68,
"euclidean": 1.17,
"euclidean_l2": 1.17,
}, # 4096d - tuned with LFW
"Facenet": {"cosine": 0.40, "euclidean": 10, "euclidean_l2": 0.80},
"Facenet512": {"cosine": 0.30, "euclidean": 23.56, "euclidean_l2": 1.04},
"ArcFace": {"cosine": 0.68, "euclidean": 4.15, "euclidean_l2": 1.13},
"Dlib": {"cosine": 0.07, "euclidean": 0.6, "euclidean_l2": 0.4},
"SFace": {"cosine": 0.593, "euclidean": 10.734, "euclidean_l2": 1.055},
"OpenFace": {"cosine": 0.10, "euclidean": 0.55, "euclidean_l2": 0.55},
"DeepFace": {"cosine": 0.23, "euclidean": 64, "euclidean_l2": 0.64},
"DeepID": {"cosine": 0.015, "euclidean": 45, "euclidean_l2": 0.17},
"GhostFaceNet": {"cosine": 0.65, "euclidean": 35.71, "euclidean_l2": 1.10},
}
threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4)
return threshold
|