anonymizeface / utils /extractor.py
JoelWester's picture
Upload 200 files
53bfa33 verified
import argparse
import math
import os
from enum import IntEnum
from pathlib import Path
import cv2
import face_alignment
import numpy as np
import numpy.linalg as npla
from PIL import Image
from diffusers.utils import load_image
landmarks_2D_new = np.array(
[
[0.000213256, 0.106454], # 17
[0.0752622, 0.038915], # 18
[0.18113, 0.0187482], # 19
[0.29077, 0.0344891], # 20
[0.393397, 0.0773906], # 21
[0.586856, 0.0773906], # 22
[0.689483, 0.0344891], # 23
[0.799124, 0.0187482], # 24
[0.904991, 0.038915], # 25
[0.98004, 0.106454], # 26
[0.490127, 0.203352], # 27
[0.490127, 0.307009], # 28
[0.490127, 0.409805], # 29
[0.490127, 0.515625], # 30
[0.36688, 0.587326], # 31
[0.426036, 0.609345], # 32
[0.490127, 0.628106], # 33
[0.554217, 0.609345], # 34
[0.613373, 0.587326], # 35
[0.121737, 0.216423], # 36
[0.187122, 0.178758], # 37
[0.265825, 0.179852], # 38
[0.334606, 0.231733], # 39
[0.260918, 0.245099], # 40
[0.182743, 0.244077], # 41
[0.645647, 0.231733], # 42
[0.714428, 0.179852], # 43
[0.793132, 0.178758], # 44
[0.858516, 0.216423], # 45
[0.79751, 0.244077], # 46
[0.719335, 0.245099], # 47
[0.254149, 0.780233], # 48
[0.726104, 0.780233], # 54
],
dtype=np.float32,
)
class FaceType(IntEnum):
# enumerating in order "next contains prev"
HALF = 0
MID_FULL = 1
FULL = 2
FULL_NO_ALIGN = 3
WHOLE_FACE = 4
WHOLE_FACE_NO_ALIGN = 5
HEAD = 10
HEAD_NO_ALIGN = 20
MARK_ONLY = (100,) # no align at all, just embedded faceinfo
@staticmethod
def fromString(s):
r = from_string_dict.get(s.lower())
if r is None:
raise Exception("FaceType.fromString value error")
return r
@staticmethod
def toString(face_type):
return to_string_dict[face_type]
to_string_dict = {
FaceType.HALF: "half_face",
FaceType.MID_FULL: "midfull_face",
FaceType.FULL: "full_face",
FaceType.FULL_NO_ALIGN: "full_face_no_align",
FaceType.WHOLE_FACE: "whole_face",
FaceType.WHOLE_FACE_NO_ALIGN: "whole_face_no_align",
FaceType.HEAD: "head",
FaceType.HEAD_NO_ALIGN: "head_no_align",
FaceType.MARK_ONLY: "mark_only",
}
from_string_dict = {to_string_dict[x]: x for x in to_string_dict.keys()}
FaceType_to_padding_remove_align = {
FaceType.HALF: (0.0, False),
FaceType.MID_FULL: (0.0675, False),
FaceType.FULL: (0.2109375, False),
FaceType.FULL_NO_ALIGN: (0.2109375, True),
FaceType.WHOLE_FACE: (0.40, False),
FaceType.WHOLE_FACE_NO_ALIGN: (0.40, True),
FaceType.HEAD: (0.70, False),
FaceType.HEAD_NO_ALIGN: (0.70, True),
}
def umeyama(src, dst, estimate_scale):
"""Estimate N-D similarity transformation with or without scaling.
Parameters
----------
src : (M, N) array
Source coordinates.
dst : (M, N) array
Destination coordinates.
estimate_scale : bool
Whether to estimate scaling factor.
Returns
-------
T : (N + 1, N + 1)
The homogeneous similarity transformation matrix. The matrix contains
NaN values only if the problem is not well-conditioned.
References
----------
.. [1] "Least-squares estimation of transformation parameters between two
point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573
"""
num = src.shape[0]
dim = src.shape[1]
# Compute mean of src and dst.
src_mean = src.mean(axis=0)
dst_mean = dst.mean(axis=0)
# Subtract mean from src and dst.
src_demean = src - src_mean
dst_demean = dst - dst_mean
# Eq. (38).
A = np.dot(dst_demean.T, src_demean) / num
# Eq. (39).
d = np.ones((dim,), dtype=np.double)
if np.linalg.det(A) < 0:
d[dim - 1] = -1
T = np.eye(dim + 1, dtype=np.double)
U, S, V = np.linalg.svd(A)
# Eq. (40) and (43).
rank = np.linalg.matrix_rank(A)
if rank == 0:
return np.nan * T
elif rank == dim - 1:
if np.linalg.det(U) * np.linalg.det(V) > 0:
T[:dim, :dim] = np.dot(U, V)
else:
s = d[dim - 1]
d[dim - 1] = -1
T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
d[dim - 1] = s
else:
T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
if estimate_scale:
# Eq. (41) and (42).
scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d)
else:
scale = 1.0
T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T)
T[:dim, :dim] *= scale
return T
def transform_points(points, mat, invert=False):
if invert:
mat = cv2.invertAffineTransform(mat)
points = np.expand_dims(points, axis=1)
points = cv2.transform(points, mat, points.shape)
points = np.squeeze(points)
return points
def estimate_averaged_yaw(landmarks):
# Works much better than solvePnP if landmarks from "3DFAN"
if not isinstance(landmarks, np.ndarray):
landmarks = np.array(landmarks)
l = (
(landmarks[27][0] - landmarks[0][0])
+ (landmarks[28][0] - landmarks[1][0])
+ (landmarks[29][0] - landmarks[2][0])
) / 3.0
r = (
(landmarks[16][0] - landmarks[27][0])
+ (landmarks[15][0] - landmarks[28][0])
+ (landmarks[14][0] - landmarks[29][0])
) / 3.0
return float(r - l)
def polygon_area(x, y):
return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
def get_transform_mat(image_landmarks, output_size, face_type, scale=1.0):
if not isinstance(image_landmarks, np.ndarray):
image_landmarks = np.array(image_landmarks)
# estimate landmarks transform from global space to local aligned space with bounds [0..1]
mat = umeyama(
np.concatenate([image_landmarks[17:49], image_landmarks[54:55]]),
landmarks_2D_new,
True,
)[0:2]
# get corner points in global space
g_p = transform_points(
np.float32([(0, 0), (1, 0), (1, 1), (0, 1), (0.5, 0.5)]), mat, True
)
g_c = g_p[4]
# calc diagonal vectors between corners in global space
tb_diag_vec = (g_p[2] - g_p[0]).astype(np.float32)
tb_diag_vec /= npla.norm(tb_diag_vec)
bt_diag_vec = (g_p[1] - g_p[3]).astype(np.float32)
bt_diag_vec /= npla.norm(bt_diag_vec)
# calc modifier of diagonal vectors for scale and padding value
padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
mod = (1.0 / scale) * (npla.norm(g_p[0] - g_p[2]) * (padding * np.sqrt(2.0) + 0.5))
if face_type == FaceType.WHOLE_FACE:
# adjust vertical offset for WHOLE_FACE, 7% below in order to cover more forehead
vec = (g_p[0] - g_p[3]).astype(np.float32)
vec_len = npla.norm(vec)
vec /= vec_len
g_c += vec * vec_len * 0.07
elif face_type == FaceType.HEAD:
# assuming image_landmarks are 3D_Landmarks extracted for HEAD,
# adjust horizontal offset according to estimated yaw
yaw = estimate_averaged_yaw(transform_points(image_landmarks, mat, False))
hvec = (g_p[0] - g_p[1]).astype(np.float32)
hvec_len = npla.norm(hvec)
hvec /= hvec_len
yaw *= np.abs(math.tanh(yaw * 2)) # Damp near zero
g_c -= hvec * (yaw * hvec_len / 2.0)
# adjust vertical offset for HEAD, 50% below
vvec = (g_p[0] - g_p[3]).astype(np.float32)
vvec_len = npla.norm(vvec)
vvec /= vvec_len
g_c += vvec * vvec_len * 0.50
# calc 3 points in global space to estimate 2d affine transform
if not remove_align:
l_t = np.array(
[g_c - tb_diag_vec * mod, g_c + bt_diag_vec * mod, g_c + tb_diag_vec * mod]
)
else:
# remove_align - face will be centered in the frame but not aligned
l_t = np.array(
[
g_c - tb_diag_vec * mod,
g_c + bt_diag_vec * mod,
g_c + tb_diag_vec * mod,
g_c - bt_diag_vec * mod,
]
)
# get area of face square in global space
area = polygon_area(l_t[:, 0], l_t[:, 1])
# calc side of square
side = np.float32(math.sqrt(area) / 2)
# calc 3 points with unrotated square
l_t = np.array([g_c + [-side, -side], g_c + [side, -side], g_c + [side, side]])
# calc affine transform from 3 global space points to 3 local space points size of 'output_size'
pts2 = np.float32(((0, 0), (output_size, 0), (output_size, output_size)))
l_t = l_t.astype(np.float32)
mat = cv2.getAffineTransform(l_t, pts2)
return mat
def extract_faces(model, image, face_image_size, face_type=FaceType.WHOLE_FACE):
# take the first three channels (R, G, B)
array = np.array(image)[:, :, :3]
preds = model.get_landmarks(array)
face_images = []
image_to_face_matrices = []
for face_landmarks in preds:
image_to_face_mat = get_transform_mat(
face_landmarks, face_image_size, face_type
)
face_array = cv2.warpAffine(
array,
image_to_face_mat,
(face_image_size, face_image_size),
cv2.INTER_LANCZOS4,
borderValue=(255, 255, 255),
)
image_to_face_matrices.append(image_to_face_mat)
face_images.append(Image.fromarray(face_array))
return face_images, image_to_face_matrices
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--image_path",
type=str,
default="",
help="Path to input image",
)
parser.add_argument(
"--image_size",
type=int,
default=256,
help="Output image size",
)
parser.add_argument(
"--output_folder",
type=str,
default="./face_images",
help="Path to output folder",
)
parser.add_argument(
"--face_type",
type=str,
default="whole_face",
help=(
"Face type to extract (e.g., half_face, midfull_face, full_face, "
"full_face_no_align, whole_face, whole_face_no_align, head, "
"head_no_align, mark_only.)"
),
)
args = parser.parse_args()
# Convert face_type string to FaceType enum
try:
args.face_type = FaceType.fromString(args.face_type)
except Exception:
raise ValueError(f"Invalid face_type: {args.face_type}")
return args
if __name__ == "__main__":
args = parse_args()
# sfd for SFD, dlib for Dlib and folder for existing bounding boxes.
fa = face_alignment.FaceAlignment(
face_alignment.LandmarksType.TWO_D, face_detector="sfd"
)
pil_image = load_image(args.image_path)
face_images, image_to_face_matrices = extract_faces(
fa, pil_image, args.image_size, args.face_type
)
# Make sure the output folder exists
os.makedirs(args.output_folder, exist_ok=True)
input_filename = Path(args.image_path).stem
for i, face_image in enumerate(face_images):
face_image.save(Path(args.output_folder, f"{input_filename}_{i:02}.png"))