|
|
from utils.dependencies.insightface.app import FaceAnalysis |
|
|
from utils.dependencies.insightface.app.common import Face |
|
|
from utils.timer import Timer |
|
|
from utils.human_landmark_runner import LandmarkRunner as HumanLandmark |
|
|
from utils.crop import crop_image |
|
|
from typing import List, Tuple, Union |
|
|
from dataclasses import dataclass, field |
|
|
import numpy as np |
|
|
import os.path as osp |
|
|
import cv2 |
|
|
|
|
|
|
|
|
def contiguous(obj): |
|
|
if not obj.flags.c_contiguous: |
|
|
obj = obj.copy(order="C") |
|
|
return obj |
|
|
|
|
|
@dataclass |
|
|
class Trajectory: |
|
|
start: int = -1 |
|
|
end: int = -1 |
|
|
lmk_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
bbox_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
M_c2o_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
|
|
|
frame_rgb_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
lmk_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
frame_rgb_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) |
|
|
|
|
|
|
|
|
def make_abs_path(fn): |
|
|
return osp.join(osp.dirname(osp.realpath(__file__)), fn) |
|
|
|
|
|
|
|
|
def sort_by_direction(faces, direction: str = 'large-small', face_center=None): |
|
|
if len(faces) <= 0: |
|
|
return faces |
|
|
if direction == 'left-right': |
|
|
return sorted(faces, key=lambda face: face['bbox'][0]) |
|
|
if direction == 'right-left': |
|
|
return sorted(faces, key=lambda face: face['bbox'][0], reverse=True) |
|
|
if direction == 'top-bottom': |
|
|
return sorted(faces, key=lambda face: face['bbox'][1]) |
|
|
if direction == 'bottom-top': |
|
|
return sorted(faces, key=lambda face: face['bbox'][1], reverse=True) |
|
|
if direction == 'small-large': |
|
|
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1])) |
|
|
if direction == 'large-small': |
|
|
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]), reverse=True) |
|
|
if direction == 'distance-from-retarget-face': |
|
|
return sorted(faces, key=lambda face: (((face['bbox'][2]+face['bbox'][0])/2-face_center[0])**2+((face['bbox'][3]+face['bbox'][1])/2-face_center[1])**2)**0.5) |
|
|
return faces |
|
|
|
|
|
|
|
|
class FaceAnalysisDIY(FaceAnalysis): |
|
|
def __init__(self, name='buffalo_l', root='~/.insightface', allowed_modules=None, **kwargs): |
|
|
super().__init__(name=name, root=root, allowed_modules=allowed_modules, **kwargs) |
|
|
|
|
|
self.timer = Timer() |
|
|
|
|
|
def get(self, img_bgr, **kwargs): |
|
|
max_num = kwargs.get('max_face_num', 0) |
|
|
flag_do_landmark_2d_106 = kwargs.get('flag_do_landmark_2d_106', True) |
|
|
direction = kwargs.get('direction', 'large-small') |
|
|
face_center = None |
|
|
|
|
|
bboxes, kpss = self.det_model.detect(img_bgr, max_num=max_num, metric='default') |
|
|
if bboxes.shape[0] == 0: |
|
|
return [] |
|
|
ret = [] |
|
|
for i in range(bboxes.shape[0]): |
|
|
bbox = bboxes[i, 0:4] |
|
|
det_score = bboxes[i, 4] |
|
|
kps = None |
|
|
if kpss is not None: |
|
|
kps = kpss[i] |
|
|
face = Face(bbox=bbox, kps=kps, det_score=det_score) |
|
|
for taskname, model in self.models.items(): |
|
|
if taskname == 'detection': |
|
|
continue |
|
|
|
|
|
if (not flag_do_landmark_2d_106) and taskname == 'landmark_2d_106': |
|
|
continue |
|
|
|
|
|
|
|
|
model.get(img_bgr, face) |
|
|
ret.append(face) |
|
|
|
|
|
ret = sort_by_direction(ret, direction, face_center) |
|
|
return ret |
|
|
|
|
|
def warmup(self): |
|
|
self.timer.tic() |
|
|
|
|
|
img_bgr = np.zeros((512, 512, 3), dtype=np.uint8) |
|
|
self.get(img_bgr) |
|
|
|
|
|
elapse = self.timer.toc() |
|
|
print(f'FaceAnalysisDIY warmup time: {elapse:.3f}s') |
|
|
|
|
|
|
|
|
class Cropper(object): |
|
|
def __init__(self, ): |
|
|
self.face_analysis_wrapper_provider = ["CPUExecutionProvider"] |
|
|
self.insightface_root: str = make_abs_path("./pretrained_weights/insightface") |
|
|
self.device_id = 0 |
|
|
self.landmark_ckpt_path: str = make_abs_path("./pretrained_weights/liveportrait/landmark.onnx") |
|
|
self.det_thresh: float = 0.1 |
|
|
self.device = "cpu" |
|
|
self.image_type = "human_face" |
|
|
self.direction: str = "large-small" |
|
|
self.max_face_num: int = 0 |
|
|
self.dsize: int = 512 |
|
|
self.scale: float = 2.3 |
|
|
self.vx_ratio: float = 0 |
|
|
self.vy_ratio: float = -0.125 |
|
|
self.flag_do_rot: bool = True |
|
|
|
|
|
self.face_analysis_wrapper = FaceAnalysisDIY( |
|
|
name="buffalo_l", |
|
|
root=self.insightface_root, |
|
|
providers=self.face_analysis_wrapper_provider, |
|
|
) |
|
|
self.face_analysis_wrapper.prepare(ctx_id=self.device_id, det_size=(512, 512), det_thresh=self.det_thresh) |
|
|
self.face_analysis_wrapper.warmup() |
|
|
|
|
|
self.human_landmark_runner = HumanLandmark( |
|
|
ckpt_path=self.landmark_ckpt_path, |
|
|
onnx_provider=self.device, |
|
|
device_id=self.device_id, |
|
|
) |
|
|
self.human_landmark_runner.warmup() |
|
|
|
|
|
def crop_source_image(self, img_rgb_: np.ndarray): |
|
|
|
|
|
img_rgb = img_rgb_.copy() |
|
|
img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
if self.image_type == "human_face": |
|
|
src_face = self.face_analysis_wrapper.get( |
|
|
img_bgr, |
|
|
flag_do_landmark_2d_106=True, |
|
|
direction=self.direction, |
|
|
max_face_num=self.max_face_num, |
|
|
) |
|
|
|
|
|
if len(src_face) == 0: |
|
|
log("No face detected in the source image.") |
|
|
return None |
|
|
elif len(src_face) > 1: |
|
|
log(f"More than one face detected in the image, only pick one face by rule {self.direction}.") |
|
|
|
|
|
|
|
|
src_face = src_face[0] |
|
|
lmk = src_face.landmark_2d_106 |
|
|
else: |
|
|
tmp_dct = { |
|
|
'animal_face_9': 'animal_face', |
|
|
'animal_face_68': 'face' |
|
|
} |
|
|
|
|
|
img_rgb_pil = Image.fromarray(img_rgb) |
|
|
lmk = self.animal_landmark_runner.run( |
|
|
img_rgb_pil, |
|
|
'face', |
|
|
tmp_dct[self.animal_face_type], |
|
|
0, |
|
|
0 |
|
|
) |
|
|
|
|
|
|
|
|
ret_dct = crop_image( |
|
|
img_rgb, |
|
|
lmk, |
|
|
dsize=self.dsize, |
|
|
scale=self.scale, |
|
|
vx_ratio=self.vx_ratio, |
|
|
vy_ratio=self.vy_ratio, |
|
|
flag_do_rot=self.flag_do_rot, |
|
|
) |
|
|
|
|
|
|
|
|
ret_dct["img_crop_256x256"] = cv2.resize(ret_dct["img_crop"], (256, 256), interpolation=cv2.INTER_AREA) |
|
|
cv2.imwrite("/data/tmp/yongqiang/LLM/projects/zr/liveportrait_onnx/img_crop.jpg", cv2.cvtColor(ret_dct["img_crop"], cv2.COLOR_BGR2RGB)) |
|
|
cv2.imwrite("/data/tmp/yongqiang/LLM/projects/zr/liveportrait_onnx/img_crop_256x256.jpg", cv2.cvtColor(ret_dct["img_crop_256x256"], cv2.COLOR_BGR2RGB)) |
|
|
if self.image_type == "human_face": |
|
|
lmk = self.human_landmark_runner.run(img_rgb, lmk) |
|
|
ret_dct["lmk_crop"] = lmk |
|
|
ret_dct["lmk_crop_256x256"] = ret_dct["lmk_crop"] * 256 / self.dsize |
|
|
else: |
|
|
|
|
|
ret_dct["lmk_crop"] = lmk |
|
|
|
|
|
return ret_dct |
|
|
|
|
|
|
|
|
def calc_lmk_from_cropped_image(self, img_rgb_, **kwargs): |
|
|
direction = kwargs.get("direction", "large-small") |
|
|
src_face = self.face_analysis_wrapper.get( |
|
|
contiguous(img_rgb_[..., ::-1]), |
|
|
flag_do_landmark_2d_106=True, |
|
|
direction=direction, |
|
|
) |
|
|
if len(src_face) == 0: |
|
|
log("No face detected in the source image.") |
|
|
return None |
|
|
elif len(src_face) > 1: |
|
|
log(f"More than one face detected in the image, only pick one face by rule {direction}.") |
|
|
src_face = src_face[0] |
|
|
lmk = src_face.landmark_2d_106 |
|
|
lmk = self.human_landmark_runner.run(img_rgb_, lmk) |
|
|
|
|
|
return lmk |
|
|
|
|
|
def calc_lmks_from_cropped_video(self, driving_rgb_crop_lst, **kwargs): |
|
|
"""Tracking based landmarks/alignment""" |
|
|
trajectory = Trajectory() |
|
|
direction = kwargs.get("direction", "large-small") |
|
|
|
|
|
for idx, frame_rgb_crop in enumerate(driving_rgb_crop_lst): |
|
|
if idx == 0 or trajectory.start == -1: |
|
|
src_face = self.face_analysis_wrapper.get( |
|
|
contiguous(frame_rgb_crop[..., ::-1]), |
|
|
flag_do_landmark_2d_106=True, |
|
|
direction=direction, |
|
|
) |
|
|
if len(src_face) == 0: |
|
|
log(f"No face detected in the frame #{idx}") |
|
|
raise Exception(f"No face detected in the frame #{idx}") |
|
|
elif len(src_face) > 1: |
|
|
log(f"More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}.") |
|
|
src_face = src_face[0] |
|
|
lmk = src_face.landmark_2d_106 |
|
|
lmk = self.human_landmark_runner.run(frame_rgb_crop, lmk) |
|
|
trajectory.start, trajectory.end = idx, idx |
|
|
else: |
|
|
lmk = self.human_landmark_runner.run(frame_rgb_crop, trajectory.lmk_lst[-1]) |
|
|
trajectory.end = idx |
|
|
|
|
|
trajectory.lmk_lst.append(lmk) |
|
|
return trajectory.lmk_lst |
|
|
|
|
|
|