datn-face-ai / deepface /modules /datastore.py
DaoManhDuc2004
Deploy DATN face AI server
b5d3a91
# built-in dependencies
import os
from typing import Any, Dict, IO, List, Union, Optional, cast
import uuid
import time
import math
import tempfile
# 3rd party dependencies
import pandas as pd
import numpy as np
from numpy.typing import NDArray
# project dependencies
from deepface.modules.database.types import Database
from deepface.modules.database.inventory import database_inventory
from deepface.modules.representation import represent
from deepface.modules.verification import (
find_angular_distance,
find_cosine_distance,
find_euclidean_distance,
l2_normalize as find_l2_normalize,
find_threshold,
find_confidence,
)
from deepface.commons.logger import Logger
logger = Logger()
# pylint: disable=too-many-positional-arguments, no-else-return
def register(
img: Union[str, NDArray[Any], IO[bytes], List[str], List[NDArray[Any]], List[IO[bytes]]],
img_name: Optional[str] = None,
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
l2_normalize: bool = False,
expand_percentage: int = 0,
normalization: str = "base",
anti_spoofing: bool = False,
database_type: str = "postgres",
connection_details: Optional[Union[Dict[str, Any], str]] = None,
connection: Any = None,
) -> Dict[str, Any]:
"""
Register identities to database for face recognition
Args:
img (str or np.ndarray or IO[bytes] or list): The exact path to the image, a numpy array
in BGR format, a file object that supports at least `.read` and is opened in binary
mode, or a base64 encoded image. If a list is provided, each element should be a string
or numpy array representing an image, and the function will process images in batch.
img_name (optional str): image name to store in db, if not provided then we will try to
extract it from given img.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l',
'centerface' or 'skip' (default is opencv).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
database_type (str): Type of database to register identities. Options: 'postgres', 'mongo',
'weaviate', 'neo4j', 'pgvector', 'pinecone' (default is 'postgres').
connection_details (dict or str): Connection details for the database.
connection (Any): Existing database connection object. If provided, this connection
will be used instead of creating a new one.
Note:
Instead of providing `connection` or `connection_details`, database connection
information can be supplied via environment variables:
- DEEPFACE_POSTGRES_URI
- DEEPFACE_MONGO_URI
- DEEPFACE_WEAVIATE_URI
- DEEPFACE_NEO4J_URI
- DEEPFACE_PINECONE_API_KEY
Returns:
result (dict): A dictionary containing registration results with following keys.
- inserted (int): Number of embeddings successfully registered to the database.
"""
db_client = __connect_database(
database_type=database_type,
connection_details=connection_details,
connection=connection,
)
results = __get_embeddings(
img=img,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
anti_spoofing=anti_spoofing,
expand_percentage=expand_percentage,
normalization=normalization,
l2_normalize=l2_normalize,
return_face=True,
)
embedding_records: List[Dict[str, Any]] = []
for result in results:
img_identifier = img_name or (
img
if isinstance(img, str) and img.endswith((".jpg", ".jpeg", ".png"))
else str(uuid.uuid4())
)
embedding_record = {
"id": None,
"img_name": img_identifier,
"face": result["face"],
"model_name": model_name,
"detector_backend": detector_backend,
"embedding": result["embedding"],
"aligned": align,
"l2_normalized": l2_normalize,
}
embedding_records.append(embedding_record)
inserted = db_client.insert_embeddings(embedding_records, batch_size=100)
logger.debug(f"Successfully registered {inserted} embeddings to the database.")
# Close the database connection if it was created internally
if connection is None:
db_client.close()
return {"inserted": inserted}
def search(
img: Union[str, NDArray[Any], IO[bytes], List[str], List[NDArray[Any]], List[IO[bytes]]],
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
distance_metric: str = "cosine",
enforce_detection: bool = True,
align: bool = True,
l2_normalize: bool = False,
expand_percentage: int = 0,
normalization: str = "base",
anti_spoofing: bool = False,
similarity_search: bool = False,
k: Optional[int] = None,
database_type: str = "postgres",
connection_details: Optional[Union[Dict[str, Any], str]] = None,
connection: Any = None,
search_method: str = "exact",
) -> List[pd.DataFrame]:
"""
Search for identities in database for face recognition. This is a stateless facial
recognition function. Use find function to do it in a stateful way.
Args:
img (str or np.ndarray or IO[bytes] or list): The exact path to the image, a numpy array
in BGR format, a file object that supports at least `.read` and is opened in binary
mode, or a base64 encoded image. If a list is provided, each element should be a string
or numpy array representing an image, and the function will process images in batch.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l',
'centerface' or 'skip' (default is opencv).
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'angular' (default is cosine).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
similarity_search (boolean): If False, performs identity verification and returns images of
the same person. If True, performs similarity search and returns visually similar faces
(e.g., celebrity or parental look-alikes). Default is False.
k (int): Number of top similar faces to retrieve from the database for each detected face.
If not specified, all faces within the threshold will be returned (default is None).
search_method (str): Method to use for searching identities. Options: 'exact', 'ann'.
To use ann search, you must run build_index function first to create the index.
database_type (str): Type of database to search identities. Options: 'postgres', 'mongo',
'weaviate', 'neo4j', 'pgvector', 'pinecone' (default is 'postgres').
connection_details (dict or str): Connection details for the database.
connection (Any): Existing database connection object. If provided, this connection
will be used instead of creating a new one.
Note:
Instead of providing `connection` or `connection_details`, database connection
information can be supplied via environment variables:
- DEEPFACE_POSTGRES_URI
- DEEPFACE_MONGO_URI
- DEEPFACE_WEAVIATE_URI
- DEEPFACE_NEO4J_URI
- DEEPFACE_PINECONE_API_KEY
Returns:
results (List[pd.DataFrame]):
A list of pandas dataframes or a list of dicts. Each dataframe or dict corresponds
to the identity information for an individual detected in the source image.
The DataFrame columns or dict keys include:
- id: ID of the detected individual.
- img_name: Name of the image file in the database.
- model_name: Name of the model used for recognition.
- aligned: Whether face alignment was performed.
- l2_normalized: Whether L2 normalization was applied.
- search_method: Method used for searching identities: exact or ann.
- target_x, target_y, target_w, target_h: Bounding box coordinates of the
target face in the database. Notice that source image's face coordinates
are not included in the result here.
- threshold: threshold to determine a pair whether same person or different persons
- confidence: Confidence score indicating the likelihood that the images
represent the same person. The score is between 0 and 100, where higher values
indicate greater confidence in the verification result.
- distance_metric: Distance metric used for similarity measurement.
Distance metric will be ignored for ann search, and set to cosine if l2_normalize
is True, euclidean if l2_normalize is False.
- distance: Similarity score between the faces based on the specified model
and distance metric
"""
dfs: List[pd.DataFrame] = []
# adjust distance metric
if search_method == "ann":
# ann does cosine for l2 normalized vectors, euclidean for non-l2 normalized vectors
new_distance_metric = "cosine" if l2_normalize is True else "euclidean"
if new_distance_metric != distance_metric:
logger.warn(
f"Overwriting distance_metric to '{new_distance_metric}' since "
f"{'vectors are L2-norm' if l2_normalize else 'vectors are not L2-norm'}."
)
distance_metric = new_distance_metric
elif search_method != "exact":
if l2_normalize is True and distance_metric == "euclidean":
logger.warn(
"Overwriting distance_metric to 'euclidean_l2' since vectors are L2 normalized."
)
distance_metric = "euclidean_l2"
threshold = find_threshold(model_name=model_name, distance_metric=distance_metric)
db_client = __connect_database(
database_type=database_type,
connection_details=connection_details,
connection=connection,
)
results = __get_embeddings(
img=img,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
anti_spoofing=anti_spoofing,
expand_percentage=expand_percentage,
normalization=normalization,
l2_normalize=l2_normalize,
return_face=False,
)
is_vector_db = database_inventory[database_type]["is_vector_db"]
if search_method == "ann" and is_vector_db is False:
try:
import faiss
except ImportError as e:
raise ValueError(
"faiss is not installed. Please install faiss to use approximate nearest neighbour."
) from e
embeddings_index_bytes = db_client.get_embeddings_index(
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
)
embeddings_index_buffer = np.frombuffer(embeddings_index_bytes, dtype=np.uint8)
embeddings_index = faiss.deserialize_index(embeddings_index_buffer)
logger.info("Loaded embeddings index from database.")
for result in results:
query_vector = np.array(result["embedding"], dtype="float32").reshape(1, -1)
distances, indices = embeddings_index.search(query_vector, k or 20)
instances = []
for i, index in enumerate(indices[0]):
distance = (
math.sqrt(distances[0][i])
if distance_metric == "euclidean"
else distances[0][i] / 2
)
verified = bool(distance <= threshold)
instance = {
"id": index,
# "img_name": "N/A", # need to fetch from DB if required
"model_name": model_name,
"detector_backend": detector_backend,
"aligned": align,
"l2_normalized": l2_normalize,
"search_method": search_method,
"target_x": result.get("facial_area", {}).get("x", None),
"target_y": result.get("facial_area", {}).get("y", None),
"target_w": result.get("facial_area", {}).get("w", None),
"target_h": result.get("facial_area", {}).get("h", None),
"threshold": threshold,
"distance_metric": distance_metric,
"distance": distance,
"confidence": find_confidence(
distance=distance,
model_name=model_name,
distance_metric=distance_metric,
verified=verified,
),
}
if similarity_search is False and verified:
instances.append(instance)
if len(instances) == 0:
continue
df = pd.DataFrame(instances)
df = df.sort_values(by="distance", ascending=True).reset_index(drop=True)
if k is not None and k > 0:
df = df.nsmallest(k, "distance")
# we should query DB to get img_name for each id
id_mappings = db_client.search_by_id(ids=df["id"].tolist())
ids_df = pd.DataFrame(id_mappings, columns=["id", "img_name"])
df = df.merge(ids_df, on="id", how="left")
del ids_df
dfs.append(df)
return dfs
elif search_method == "ann" and is_vector_db is True:
for result in results:
target_vector: List[float] = result["embedding"]
neighbours = db_client.search_by_vector(
vector=target_vector,
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
limit=k or 20,
)
if not neighbours:
raise ValueError(
"No embeddings found in the database for the criteria "
f"{model_name=}, {detector_backend=}, {align=}, {l2_normalize=}."
"You must call register some embeddings to the database before using search."
)
instances = []
for neighbour in neighbours:
distance = neighbour["distance"]
verified = bool(distance <= threshold)
instance = {
"id": neighbour["id"],
"img_name": neighbour["img_name"],
"model_name": model_name,
"detector_backend": detector_backend,
"aligned": align,
"l2_normalized": l2_normalize,
"search_method": search_method,
"target_x": result.get("facial_area", {}).get("x", None),
"target_y": result.get("facial_area", {}).get("y", None),
"target_w": result.get("facial_area", {}).get("w", None),
"target_h": result.get("facial_area", {}).get("h", None),
"threshold": threshold,
"distance_metric": distance_metric,
"distance": distance,
"confidence": find_confidence(
distance=distance,
model_name=model_name,
distance_metric=distance_metric,
verified=verified,
),
}
if similarity_search is False and verified:
instances.append(instance)
if len(instances) > 0:
df = pd.DataFrame(instances)
df = df.sort_values(by="distance", ascending=True).reset_index(drop=True)
if k is not None and k > 0:
df = df.nsmallest(k, "distance")
dfs.append(df)
return dfs
elif search_method == "exact":
source_embeddings = db_client.fetch_all_embeddings(
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
)
if not source_embeddings:
raise ValueError(
"No embeddings found in the database for the criteria "
f"{model_name=}, {detector_backend=}, {align=}, {l2_normalize=}."
"You must call register some embeddings to the database before using search."
)
for result in results:
target_embedding = cast(List[float], result["embedding"])
df = pd.DataFrame(source_embeddings)
df["target_embedding"] = [target_embedding for _ in range(len(df))]
df["search_method"] = search_method
df["target_x"] = result.get("facial_area", {}).get("x", None)
df["target_y"] = result.get("facial_area", {}).get("y", None)
df["target_w"] = result.get("facial_area", {}).get("w", None)
df["target_h"] = result.get("facial_area", {}).get("h", None)
df["threshold"] = threshold
df["distance_metric"] = distance_metric
if distance_metric == "cosine":
df["distance"] = df.apply(
lambda row: find_cosine_distance(row["embedding"], row["target_embedding"]),
axis=1,
)
elif distance_metric == "euclidean":
df["distance"] = df.apply(
lambda row: find_euclidean_distance(row["embedding"], row["target_embedding"]),
axis=1,
)
elif distance_metric == "angular":
df["distance"] = df.apply(
lambda row: find_angular_distance(row["embedding"], row["target_embedding"]),
axis=1,
)
elif distance_metric == "euclidean_l2":
df["distance"] = df.apply(
lambda row: find_euclidean_distance(
find_l2_normalize(row["embedding"]),
find_l2_normalize(row["target_embedding"]),
),
axis=1,
)
else:
raise ValueError(f"Unsupported distance metric: {distance_metric}")
df["confidence"] = df.apply(
lambda row: find_confidence(
distance=row["distance"],
model_name=model_name,
distance_metric=distance_metric,
verified=bool(row["distance"] <= threshold),
),
axis=1,
)
df = df.drop(columns=["embedding", "target_embedding"])
if similarity_search is False:
df = df[df["distance"] <= threshold]
if k is not None and k > 0:
df = df.nsmallest(k, "distance")
df = df.sort_values(by="distance", ascending=True).reset_index(drop=True)
dfs.append(df)
return dfs
else:
raise ValueError(f"Unsupported search method: {search_method}")
def build_index(
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
align: bool = True,
l2_normalize: bool = False,
database_type: str = "postgres",
connection_details: Optional[Union[Dict[str, Any], str]] = None,
connection: Any = None,
batch_size: int = 1000,
max_neighbors_per_node: int = 32,
) -> None:
"""
Build index for faster search in the database. You should set search_method to 'ann'
in the search function to use the built index.
Args:
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l',
'centerface' or 'skip' (default is opencv).
align (bool): Flag to enable face alignment (default is True).
l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
max_neighbors_per_node (int): Maximum number of neighbors per node in the index
(default is 32).
database_type (str): Type of database to build index. Options: 'postgres', 'mongo',
'weaviate', 'neo4j', 'pgvector', 'pinecone' (default is 'postgres').
connection (Any): Existing database connection object. If provided, this connection
will be used instead of creating a new one.
connection_details (dict or str): Connection details for the database.
Note:
Instead of providing `connection` or `connection_details`, database connection
information can be supplied via environment variables:
- DEEPFACE_POSTGRES_URI
- DEEPFACE_MONGO_URI
- DEEPFACE_WEAVIATE_URI
- DEEPFACE_NEO4J_URI
- DEEPFACE_PINECONE_API_KEY
"""
if database_inventory.get(database_type) is None:
raise ValueError(f"Unsupported database type: {database_type}")
is_vector_db = database_inventory[database_type]["is_vector_db"]
if is_vector_db is True:
logger.info(f"{database_type} manages its own indexes. No need to build index manually.")
return
try:
import faiss
except ImportError as e:
raise ValueError("faiss is not installed. Please install faiss to use build_index.") from e
db_client = __connect_database(
database_type=database_type,
connection_details=connection_details,
connection=connection,
)
index = __get_index(
db_client=db_client,
model_name=model_name,
detector_backend=detector_backend,
align=align,
l2_normalize=l2_normalize,
)
if index is not None:
indexed_indices = faiss.vector_to_array(index.id_map)
indexed_embeddings = set(indexed_indices)
logger.info(f"Found {len(indexed_embeddings)} embeddings already indexed in the database.")
else:
logger.info("No existing index found in the database. A new index will be created.")
indexed_embeddings = set()
tic = time.time()
source_embeddings = db_client.fetch_all_embeddings(
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
)
toc = time.time()
if not source_embeddings:
raise ValueError(
"No embeddings found in the database for the criteria "
f"{model_name=}, {detector_backend=}, {align=}, {l2_normalize=}."
"You must call register some embeddings to the database before using build_index."
)
logger.info(
f"Fetched {len(source_embeddings)} embeddings from database in {toc - tic:.2f} seconds."
)
unindexed_source_embeddings = [
item for item in source_embeddings if item["id"] not in indexed_embeddings
]
if not unindexed_source_embeddings:
logger.info("All embeddings are already indexed. No new embeddings to index.")
return
ids = [item["id"] for item in unindexed_source_embeddings]
vectors = np.array([item["embedding"] for item in unindexed_source_embeddings], dtype="float32")
embedding_dim_size = len(source_embeddings[0]["embedding"])
if index is None:
base_index = faiss.IndexHNSWFlat(embedding_dim_size, max_neighbors_per_node)
index = faiss.IndexIDMap(base_index)
tic = time.time()
for i in range(0, len(vectors), batch_size):
batch_ids = np.array(ids[i : i + batch_size], dtype="int64")
batch_vectors = vectors[i : i + batch_size]
index.add_with_ids(batch_vectors, batch_ids)
toc = time.time()
logger.info(f"Added {len(vectors)} embeddings to index in {toc - tic:.2f} seconds.")
index_path = os.path.join(
tempfile.gettempdir(), f"{model_name}_{detector_backend}_{align}_{l2_normalize}.faiss"
)
# now create index from scratch, then think how to load an index and add new vectors to it
tic = time.time()
faiss.write_index(index, index_path)
toc = time.time()
logger.info(f"Saved index to disk in {toc - tic:.2f} seconds")
with open(index_path, "rb") as f:
index_data = f.read()
tic = time.time()
db_client.upsert_embeddings_index(
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
index_data=index_data,
)
toc = time.time()
logger.info(f"Upserted index to database in {toc - tic:.2f} seconds.")
# clean up temp file
if os.path.exists(index_path):
os.remove(index_path)
def __get_embeddings(
img: Union[str, NDArray[Any], IO[bytes], List[str], List[NDArray[Any]], List[IO[bytes]]],
model_name: str = "VGG-Face",
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
l2_normalize: bool = False,
expand_percentage: int = 0,
normalization: str = "base",
anti_spoofing: bool = False,
return_face: bool = True,
) -> List[Dict[str, Any]]:
"""
Get embeddings for given image(s)
Args:
img (str or np.ndarray or IO[bytes] or list): The exact path to the image, a numpy array
in BGR format, a file object that supports at least `.read` and is opened in binary
mode, or a base64 encoded image. If a list is provided, each element should be a string
or numpy array representing an image, and the function will process images in batch.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8n', 'yolov8m', 'yolov8l', 'yolov11n',
'yolov11s', 'yolov11m', 'yolov11l', 'yolov12n', 'yolov12s', 'yolov12m', 'yolov12l',
'centerface' or 'skip' (default is opencv).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
return_face (bool): Whether to return the aligned face along with the embedding
(default is True).
Returns:
results (List[Dict]): A list of dictionaries containing embeddings and optionally
aligned face images for each detected face in the input image(s).
"""
results = represent(
img_path=img,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
anti_spoofing=anti_spoofing,
expand_percentage=expand_percentage,
normalization=normalization,
l2_normalize=l2_normalize,
return_face=return_face,
)
if len(results) == 0:
raise ValueError("No embeddings were detected in the provided image(s).")
flat_results: List[Dict[str, Any]] = []
for result in results:
if isinstance(result, dict):
flat_results.append(result)
elif isinstance(result, list):
flat_results.extend(result)
return flat_results
def __connect_database(
database_type: str = "postgres",
connection_details: Optional[Union[Dict[str, Any], str]] = None,
connection: Any = None,
) -> Database:
"""
Connect to the specified database type
Args:
database_type (str): Type of database to connect. Options: 'postgres', 'mongo',
'weaviate', 'neo4j', 'pgvector', 'pinecone' (default is 'postgres').
connection_details (dict or str): Connection details for the database.
connection (Any): Existing database connection object. If provided, this connection
will be used instead of creating a new one.
Note:
Instead of providing `connection` or `connection_details`, database connection
information can be supplied via environment variables:
- DEEPFACE_POSTGRES_URI
- DEEPFACE_MONGO_URI
- DEEPFACE_WEAVIATE_URI
- DEEPFACE_NEO4J_URI
- DEEPFACE_PINECONE_API_KEY
Returns:
db_client (Database): An instance of the connected database client.
"""
if database_inventory.get(database_type) is None:
raise ValueError(f"Unsupported database type: {database_type}")
client_class = database_inventory[database_type]["client"]
return client_class(connection_details=connection_details, connection=connection)
def __get_index(
db_client: Database,
model_name: str,
detector_backend: str,
align: bool,
l2_normalize: bool,
) -> Any:
"""
Retrieve the embeddings index from the database
Args:
db_client (Database): An instance of the connected database client.
model_name (str): Model for face recognition.
detector_backend (string): face detector backend.
align (bool): Flag to enable face alignment.
l2_normalize (bool): Flag to enable L2 normalization (unit vector normalization)
Returns:
embeddings_index (Any): The deserialized embeddings index object, or None if not found.
"""
import faiss
try:
embeddings_index_bytes = db_client.get_embeddings_index(
model_name=model_name,
detector_backend=detector_backend,
aligned=align,
l2_normalized=l2_normalize,
)
embeddings_index_buffer = np.frombuffer(embeddings_index_bytes, dtype=np.uint8)
embeddings_index = faiss.deserialize_index(embeddings_index_buffer)
return embeddings_index
except ValueError:
return None