robinwitch's picture
upload ckpt
872b1a7
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2
def convert_bbox_to_square_bbox(bbox, max_h, max_w, scale=1.0):
# Calculate width, height, and max_size of the bounding box
width = bbox[1][0] - bbox[0][0]
height = bbox[1][1] - bbox[0][1]
max_size = max(width, height) * scale
# Calculate center of the bounding box
center_x = (bbox[0][0] + bbox[1][0]) / 2
center_y = (bbox[0][1] + bbox[1][1]) / 2
# Calculate the left-up and right-bottom corners of the square bounding box
half_size = max_size / 2
left_top = [int(center_x - half_size), int(center_y - half_size)]
right_bottom = [int(center_x + half_size), int(center_y + half_size)]
# Ensure the square is within image bounds
left_top[0] = max(0, left_top[0])
left_top[1] = max(0, left_top[1])
right_bottom[0] = min(max_w, right_bottom[0])
right_bottom[1] = min(max_h, right_bottom[1])
# Return the new bounding box as a list of top-left and bottom-right coordinates
return [left_top[0], left_top[1], right_bottom[0], right_bottom[1]]
def draw_landmarks_on_image(rgb_image, detection_result):
face_landmarks_list = detection_result.face_landmarks
annotated_image = np.copy(rgb_image)
# Loop through the detected faces to visualize.
for idx in range(len(face_landmarks_list)):
face_landmarks = face_landmarks_list[idx]
# Draw the face landmarks.
face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
face_landmarks_proto.landmark.extend(
[
landmark_pb2.NormalizedLandmark(
x=landmark.x, y=landmark.y, z=landmark.z
)
for landmark in face_landmarks
]
)
solutions.drawing_utils.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks_proto,
connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_tesselation_style(),
)
solutions.drawing_utils.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks_proto,
connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_contours_style(),
)
solutions.drawing_utils.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks_proto,
connections=mp.solutions.face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_iris_connections_style(),
)
return annotated_image
class FaceDetector:
def __init__(self, mediapipe_model_asset_path, delegate=1, face_detection_confidence=0.5, num_faces=5):
# Create a face landmarker instance with the video mode:
options = mp.tasks.vision.FaceLandmarkerOptions(
base_options=mp.tasks.BaseOptions(
model_asset_path=mediapipe_model_asset_path,
# delegate=mp.tasks.BaseOptions.Delegate.GPU,
# TODO: why does the gpu version not work in docker???
delegate=delegate,
),
running_mode=mp.tasks.vision.RunningMode.IMAGE,
num_faces=num_faces,
output_face_blendshapes=True,
output_facial_transformation_matrixes=True,
min_face_detection_confidence=face_detection_confidence,
min_face_presence_confidence=face_detection_confidence,
min_tracking_confidence=face_detection_confidence,
)
self.detector = mp.tasks.vision.FaceLandmarker.create_from_options(options)
def get_one_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False):
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
# get facial rotation
results = self.detector.detect(mp_image)
max_h, max_w = image.shape[:2]
if annotate_image:
annotated_image = draw_landmarks_on_image(image, results)
else:
annotated_image = None
all_x = []
all_y = []
all_orientation = []
all_keypoints = []
all_bounding_box = []
all_mouth_bounding_box = []
all_eye_bounding_box = []
all_face_contour = []
all_eyeball = []
all_eyeball_mask = []
all_blendshapes = []
all_mouth_p = []
all_nose_p = []
all_left_eye_p = []
all_right_eye_p = []
num_faces = len(results.face_landmarks)
for face_blendshapes in results.face_blendshapes:
blendshapes = [item.score for item in face_blendshapes]
all_blendshapes.append(blendshapes)
all_facial_transformation_matrices = results.facial_transformation_matrixes
for face_landmarks in results.face_landmarks:
keypoints = []
bounding_box = []
h, w = image.shape[0], image.shape[1]
cx_min, cy_min = w, h
cx_max, cy_max = 0, 0
for idx, lm in enumerate(face_landmarks):
# Clip landmarks if they go off the image
cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h)
if cx < cx_min:
cx_min = cx
if cy < cy_min:
cy_min = cy
if cx > cx_max:
cx_max = cx
if cy > cy_max:
cy_max = cy
keypoints.append((lm.x, lm.y, lm.z))
if idx == 137:
right_cheek = (lm.x, lm.y, lm.z)
if idx == 366:
left_cheek = (lm.x, lm.y, lm.z)
if idx == 4:
nose = (lm.x, lm.y, lm.z)
# get vector from middle of face to tip of nose
face_middle = (
(right_cheek[0] + left_cheek[0]) / 2.0,
(right_cheek[1] + left_cheek[1]) / 2.0,
)
x = nose[0] - face_middle[0]
y = nose[1] - face_middle[1]
if x > 0.15:
orientation = "left"
elif x < -0.15:
orientation = "right"
else:
orientation = "forward"
bounding_box = [(cx_min, cy_min), (cx_max, cy_max)]
all_keypoints.append(keypoints)
all_bounding_box.append(bounding_box)
all_x.append(x)
all_y.append(y)
all_orientation.append(orientation)
# Get mouth bounding box (landmarks 13-17 and 308-312)
mouth_landmarks = [
61,
146,
146,
91,
91,
181,
181,
84,
84,
17,
17,
314,
314,
405,
405,
321,
321,
375,
375,
291,
61,
185,
185,
40,
40,
39,
39,
37,
37,
0,
0,
267,
267,
269,
269,
270,
270,
409,
409,
291,
78,
95,
95,
88,
88,
178,
178,
87,
87,
14,
14,
317,
317,
402,
402,
318,
318,
324,
324,
308,
78,
191,
191,
80,
80,
81,
81,
82,
82,
13,
13,
312,
312,
311,
311,
310,
310,
415,
415,
308,
]
# mouth_landmarks = [13, 14, 15, 16, 17, 308, 309, 310, 311, 312]
mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks]
mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks]
mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))]
mouth_p = np.array([(mouth_bbox[0][0] + mouth_bbox[1][0]) / 2, (mouth_bbox[1][0] + mouth_bbox[1][1]) / 2])
mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale)
nose_landmarks = [48, 115, 220, 45, 4, 275, 440, 344, 278]
nose_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in nose_landmarks]
nose_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in nose_landmarks]
nose_bbox = [(min(nose_x), min(nose_y)), (max(nose_x), max(nose_y))]
nose_p = np.array([(nose_bbox[0][0] + nose_bbox[1][0]) / 2, (nose_bbox[1][0] + nose_bbox[1][1]) / 2])
# width = mouth_bbox[1][0] - mouth_bbox[0][0]
# height = mouth_bbox[1][1] - mouth_bbox[0][1]
# max_size = max(width, height) * 1.2
# center_x = (mouth_bbox[0][0] + mouth_bbox[1][0]) / 2
# center_y = (mouth_bbox[0][1] + mouth_bbox[1][1]) / 2
# left_up = (int(center_x - max_size/2), int(center_y - max_size/2))
# right_bottom = (int(center_x + max_size/2), int(center_y + max_size/2))
# mouth_bbox = [left_up, right_bottom]
all_mouth_bounding_box.append(mouth_bbox)
# Get eye bounding boxes (left eye: landmarks 33-133, right eye: landmarks 362-263)
left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382]
right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7]
left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks]
left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks]
left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))]
left_size = max(left_eye_y) - min(left_eye_y)
left_eye_p = np.array([(left_eye_bbox[0][0] + left_eye_bbox[1][0]) / 2, (left_eye_bbox[1][0] + left_eye_bbox[1][1]) / 2])
left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale)
right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks]
right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks]
right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))]
right_size = max(right_eye_y) - min(right_eye_y)
right_eye_p = np.array([(right_eye_bbox[0][0] + right_eye_bbox[1][0]) / 2, (right_eye_bbox[1][0] + right_eye_bbox[1][1]) / 2])
right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale)
eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox}
all_eye_bounding_box.append(eye_bbox)
face_contour = np.zeros_like(image)
for landmark_id, landmark in enumerate(face_landmarks):
cx, cy = int(landmark.x * w), int(landmark.y * h)
if cy >= max_h or cx >= max_w: continue
if cy < 0 or cx < 0: continue
face_contour[cy, cx] = (255, 255, 255)
eyeball = np.zeros_like(image)
for landmark_id, landmark in enumerate(face_landmarks):
cx, cy = int(landmark.x * w), int(landmark.y * h)
if landmark_id not in [468, 473]: continue
if cy >= max_h or cx >= max_w: continue
if cy < 0 or cx < 0: continue
radius = int(left_size // 3) if landmark_id == 468 else int(right_size // 3)
cv2.circle(eyeball, (cx, cy), radius=radius, color=(255, 0, 0), thickness=-1)
eyeball_mask = (eyeball.sum(axis=2) != 0)[:, :, None]
all_eyeball.append(eyeball)
all_eyeball_mask.append(eyeball_mask)
all_face_contour.append(face_contour)
all_mouth_p.append(mouth_p)
all_nose_p.append(nose_p)
all_left_eye_p.append(left_eye_p)
all_right_eye_p.append(right_eye_p)
if save_vis:
x_min, y_min, x_max, y_max = mouth_bbox
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2)
for eye_key, bbox in eye_bbox.items():
x_min, y_min, x_max, y_max = bbox
color = (0, 0, 255)
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)
for landmark_id, landmark in enumerate(face_landmarks):
cx, cy = int(landmark.x * w), int(landmark.y * h)
circle_size = 2
if landmark_id in mouth_landmarks:
cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1)
elif landmark_id in left_eye_landmarks+right_eye_landmarks:
cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1)
else:
cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1)
cv2.imwrite('image_detect.png', image[:,:,::-1])
# import pdb; pdb.set_trace()
return (
all_x,
all_y,
all_orientation,
num_faces,
all_keypoints,
all_bounding_box,
all_mouth_bounding_box,
all_eye_bounding_box,
all_face_contour,
all_blendshapes,
all_facial_transformation_matrices,
annotated_image,
all_mouth_p, # 12
all_nose_p, # 13
all_left_eye_p, # 14
all_right_eye_p, # 15
all_eyeball, # 16
all_eyeball_mask, # 17
)
def get_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False):
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
# get facial rotation
results = self.detector.detect(mp_image)
max_h, max_w = image.shape[:2]
if annotate_image:
annotated_image = draw_landmarks_on_image(image, results)
else:
annotated_image = None
all_x = []
all_y = []
all_orientation = []
all_keypoints = []
all_bounding_box = []
all_mouth_bounding_box = []
all_eye_bounding_box = []
all_face_contour = []
all_blendshapes = []
num_faces = len(results.face_landmarks)
for face_blendshapes in results.face_blendshapes:
blendshapes = [item.score for item in face_blendshapes]
all_blendshapes.append(blendshapes)
all_facial_transformation_matrices = results.facial_transformation_matrixes
for face_landmarks in results.face_landmarks:
keypoints = []
bounding_box = []
h, w = image.shape[0], image.shape[1]
cx_min, cy_min = w, h
cx_max, cy_max = 0, 0
for idx, lm in enumerate(face_landmarks):
# Clip landmarks if they go off the image
cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h)
if cx < cx_min:
cx_min = cx
if cy < cy_min:
cy_min = cy
if cx > cx_max:
cx_max = cx
if cy > cy_max:
cy_max = cy
keypoints.append((lm.x, lm.y, lm.z))
if idx == 137:
right_cheek = (lm.x, lm.y, lm.z)
if idx == 366:
left_cheek = (lm.x, lm.y, lm.z)
if idx == 4:
nose = (lm.x, lm.y, lm.z)
# get vector from middle of face to tip of nose
face_middle = (
(right_cheek[0] + left_cheek[0]) / 2.0,
(right_cheek[1] + left_cheek[1]) / 2.0,
)
x = nose[0] - face_middle[0]
y = nose[1] - face_middle[1]
if x > 0.15:
orientation = "left"
elif x < -0.15:
orientation = "right"
else:
orientation = "forward"
bounding_box = [(cx_min, cy_min), (cx_max, cy_max)]
all_keypoints.append(keypoints)
all_bounding_box.append(bounding_box)
all_x.append(x)
all_y.append(y)
all_orientation.append(orientation)
# Get mouth bounding box (landmarks 13-17 and 308-312)
mouth_landmarks = [
61,
146,
146,
91,
91,
181,
181,
84,
84,
17,
17,
314,
314,
405,
405,
321,
321,
375,
375,
291,
61,
185,
185,
40,
40,
39,
39,
37,
37,
0,
0,
267,
267,
269,
269,
270,
270,
409,
409,
291,
78,
95,
95,
88,
88,
178,
178,
87,
87,
14,
14,
317,
317,
402,
402,
318,
318,
324,
324,
308,
78,
191,
191,
80,
80,
81,
81,
82,
82,
13,
13,
312,
312,
311,
311,
310,
310,
415,
415,
308,
]
# mouth_landmarks = [13, 14, 15, 16, 17, 308, 309, 310, 311, 312]
mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks]
mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks]
mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))]
mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale)
# width = mouth_bbox[1][0] - mouth_bbox[0][0]
# height = mouth_bbox[1][1] - mouth_bbox[0][1]
# max_size = max(width, height) * 1.2
# center_x = (mouth_bbox[0][0] + mouth_bbox[1][0]) / 2
# center_y = (mouth_bbox[0][1] + mouth_bbox[1][1]) / 2
# left_up = (int(center_x - max_size/2), int(center_y - max_size/2))
# right_bottom = (int(center_x + max_size/2), int(center_y + max_size/2))
# mouth_bbox = [left_up, right_bottom]
all_mouth_bounding_box.append(mouth_bbox)
# Get eye bounding boxes (left eye: landmarks 33-133, right eye: landmarks 362-263)
left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382]
right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7]
left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks]
left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks]
left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))]
left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale)
right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks]
right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks]
right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))]
right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale)
eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox}
all_eye_bounding_box.append(eye_bbox)
face_contour = np.zeros_like(image)
for landmark_id, landmark in enumerate(face_landmarks):
cx, cy = int(landmark.x * w), int(landmark.y * h)
if cy >= max_h or cx >= max_w: continue
if cy < 0 or cx < 0: continue
face_contour[cy, cx] = (255, 255, 255)
all_face_contour.append(face_contour)
if save_vis:
import cv2
x_min, y_min, x_max, y_max = mouth_bbox
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2)
for eye_key, bbox in eye_bbox.items():
x_min, y_min, x_max, y_max = bbox
color = (0, 0, 255)
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)
for landmark_id, landmark in enumerate(face_landmarks):
cx, cy = int(landmark.x * w), int(landmark.y * h)
circle_size = 2
if landmark_id in mouth_landmarks:
cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1)
elif landmark_id in left_eye_landmarks+right_eye_landmarks:
cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1)
else:
cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1)
cv2.imwrite('image_detect.png', image[:,:,::-1])
# import pdb; pdb.set_trace()
return (
all_x,
all_y,
all_orientation,
num_faces,
all_keypoints,
all_bounding_box,
all_mouth_bounding_box,
all_eye_bounding_box,
all_face_contour,
all_blendshapes,
all_facial_transformation_matrices,
annotated_image,
)