| import os |
| import numpy as np |
| import math |
| import json |
| import imageio |
| import torch |
| import tqdm |
| import cv2 |
|
|
| from data_util.face3d_helper import Face3DHelper |
| from utils.commons.euler2rot import euler_trans_2_c2w, c2w_to_euler_trans |
| from data_gen.utils.process_video.euler2quaterion import euler2quaterion, quaterion2euler |
| from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel |
|
|
|
|
| def euler2rot(euler_angle): |
| batch_size = euler_angle.shape[0] |
| theta = euler_angle[:, 0].reshape(-1, 1, 1) |
| phi = euler_angle[:, 1].reshape(-1, 1, 1) |
| psi = euler_angle[:, 2].reshape(-1, 1, 1) |
| one = torch.ones(batch_size, 1, 1).to(euler_angle.device) |
| zero = torch.zeros(batch_size, 1, 1).to(euler_angle.device) |
| rot_x = torch.cat(( |
| torch.cat((one, zero, zero), 1), |
| torch.cat((zero, theta.cos(), theta.sin()), 1), |
| torch.cat((zero, -theta.sin(), theta.cos()), 1), |
| ), 2) |
| rot_y = torch.cat(( |
| torch.cat((phi.cos(), zero, -phi.sin()), 1), |
| torch.cat((zero, one, zero), 1), |
| torch.cat((phi.sin(), zero, phi.cos()), 1), |
| ), 2) |
| rot_z = torch.cat(( |
| torch.cat((psi.cos(), -psi.sin(), zero), 1), |
| torch.cat((psi.sin(), psi.cos(), zero), 1), |
| torch.cat((zero, zero, one), 1) |
| ), 2) |
| return torch.bmm(rot_x, torch.bmm(rot_y, rot_z)) |
|
|
|
|
| def rot2euler(rot_mat): |
| batch_size = len(rot_mat) |
| |
| cos_y = torch.sqrt(rot_mat[:, 1, 2] * rot_mat[:, 1, 2] + rot_mat[:, 2, 2] * rot_mat[:, 2, 2]) |
| theta_x = torch.atan2(-rot_mat[:, 1, 2], rot_mat[:, 2, 2]) |
| theta_y = torch.atan2(rot_mat[:, 2, 0], cos_y) |
| theta_z = torch.atan2(rot_mat[:, 0, 1], rot_mat[:, 0, 0]) |
| euler_angles = torch.zeros([batch_size, 3]) |
| euler_angles[:, 0] = theta_x |
| euler_angles[:, 1] = theta_y |
| euler_angles[:, 2] = theta_z |
| return euler_angles |
|
|
| index_lm68_from_lm468 = [127,234,93,132,58,136,150,176,152,400,379,365,288,361,323,454,356,70,63,105,66,107,336,296,334,293,300,168,197,5,4,75,97,2,326,305, |
| 33,160,158,133,153,144,362,385,387,263,373,380,61,40,37,0,267,270,291,321,314,17,84,91,78,81,13,311,308,402,14,178] |
|
|
| def plot_lm2d(lm2d): |
| WH = 512 |
| img = np.ones([WH, WH, 3], dtype=np.uint8) * 255 |
| |
| for i in range(len(lm2d)): |
| x, y = lm2d[i] |
| color = (255,0,0) |
| img = cv2.circle(img, center=(int(x),int(y)), radius=3, color=color, thickness=-1) |
| font = cv2.FONT_HERSHEY_SIMPLEX |
| for i in range(len(lm2d)): |
| x, y = lm2d[i] |
| img = cv2.putText(img, f"{i}", org=(int(x),int(y)), fontFace=font, fontScale=0.3, color=(255,0,0)) |
| return img |
|
|
| def get_face_rect(lms, h, w): |
| """ |
| lms: [68, 2] |
| h, w: int |
| return: [4,] |
| """ |
| assert len(lms) == 68 |
| |
| min_x, max_x = np.min(lms[:, 0]), np.max(lms[:, 0]) |
| cx = int((min_x+max_x)/2.0) |
| cy = int(lms[27, 1]) |
| h_w = int((max_x-cx)*1.5) |
| h_h = int((lms[8, 1]-cy)*1.15) |
| rect_x = cx - h_w |
| rect_y = cy - h_h |
| if rect_x < 0: |
| rect_x = 0 |
| if rect_y < 0: |
| rect_y = 0 |
| rect_w = min(w-1-rect_x, 2*h_w) |
| rect_h = min(h-1-rect_y, 2*h_h) |
| |
| |
| rect = [rect_x, rect_x + rect_w, rect_y, rect_y + rect_h] |
| return rect |
|
|
| def get_lip_rect(lms, h, w): |
| """ |
| lms: [68, 2] |
| h, w: int |
| return: [4,] |
| """ |
| |
| |
| assert len(lms) == 68 |
| lips = slice(48, 60) |
| lms = lms[lips] |
| min_x, max_x = np.min(lms[:, 0]), np.max(lms[:, 0]) |
| min_y, max_y = np.min(lms[:, 1]), np.max(lms[:, 1]) |
| cx = int((min_x+max_x)/2.0) |
| cy = int((min_y+max_y)/2.0) |
| h_w = int((max_x-cx)*1.2) |
| h_h = int((max_y-cy)*1.2) |
| |
| h_w = max(h_w, h_h) |
| h_h = h_w |
|
|
| rect_x = cx - h_w |
| rect_y = cy - h_h |
| rect_w = 2*h_w |
| rect_h = 2*h_h |
| if rect_x < 0: |
| rect_x = 0 |
| if rect_y < 0: |
| rect_y = 0 |
| |
| if rect_x + rect_w > w: |
| rect_x = w - rect_w |
| if rect_y + rect_h > h: |
| rect_y = h - rect_h |
|
|
| rect = [rect_x, rect_x + rect_w, rect_y, rect_y + rect_h] |
| return rect |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def get_win_conds(conds, idx, smo_win_size=8, pad_option='zero'): |
| """ |
| conds: [b, t=16, h=29] |
| idx: long, time index of the selected frame |
| """ |
| idx = max(0, idx) |
| idx = min(idx, conds.shape[0]-1) |
| smo_half_win_size = smo_win_size//2 |
| left_i = idx - smo_half_win_size |
| right_i = idx + (smo_win_size - smo_half_win_size) |
| pad_left, pad_right = 0, 0 |
| if left_i < 0: |
| pad_left = -left_i |
| left_i = 0 |
| if right_i > conds.shape[0]: |
| pad_right = right_i - conds.shape[0] |
| right_i = conds.shape[0] |
| conds_win = conds[left_i:right_i] |
| if pad_left > 0: |
| if pad_option == 'zero': |
| conds_win = np.concatenate([np.zeros_like(conds_win)[:pad_left], conds_win], axis=0) |
| elif pad_option == 'edge': |
| edge_value = conds[0][np.newaxis, ...] |
| conds_win = np.concatenate([edge_value] * pad_left + [conds_win], axis=0) |
| else: |
| raise NotImplementedError |
| if pad_right > 0: |
| if pad_option == 'zero': |
| conds_win = np.concatenate([conds_win, np.zeros_like(conds_win)[:pad_right]], axis=0) |
| elif pad_option == 'edge': |
| edge_value = conds[-1][np.newaxis, ...] |
| conds_win = np.concatenate([conds_win] + [edge_value] * pad_right , axis=0) |
| else: |
| raise NotImplementedError |
| assert conds_win.shape[0] == smo_win_size |
| return conds_win |
|
|
|
|
| def load_processed_data(processed_dir): |
| |
| background_img_name = os.path.join(processed_dir, "bg.jpg") |
| assert os.path.exists(background_img_name) |
| head_img_dir = os.path.join(processed_dir, "head_imgs") |
| torso_img_dir = os.path.join(processed_dir, "inpaint_torso_imgs") |
| gt_img_dir = os.path.join(processed_dir, "gt_imgs") |
|
|
| hubert_npy_name = os.path.join(processed_dir, "aud_hubert.npy") |
| mel_f0_npy_name = os.path.join(processed_dir, "aud_mel_f0.npy") |
| coeff_npy_name = os.path.join(processed_dir, "coeff_fit_mp.npy") |
| lm2d_npy_name = os.path.join(processed_dir, "lms_2d.npy") |
| |
| ret_dict = {} |
|
|
| ret_dict['bg_img'] = imageio.imread(background_img_name) |
| ret_dict['H'], ret_dict['W'] = ret_dict['bg_img'].shape[:2] |
| ret_dict['focal'], ret_dict['cx'], ret_dict['cy'] = face_model.focal, face_model.center, face_model.center |
|
|
| print("loading lm2d coeff ...") |
| lm2d_arr = np.load(lm2d_npy_name) |
| face_rect_lst = [] |
| lip_rect_lst = [] |
| for lm2d in lm2d_arr: |
| if len(lm2d) in [468, 478]: |
| lm2d = lm2d[index_lm68_from_lm468] |
| face_rect = get_face_rect(lm2d, ret_dict['H'], ret_dict['W']) |
| lip_rect = get_lip_rect(lm2d, ret_dict['H'], ret_dict['W']) |
| face_rect_lst.append(face_rect) |
| lip_rect_lst.append(lip_rect) |
| face_rects = np.stack(face_rect_lst, axis=0) |
|
|
| print("loading fitted 3dmm coeff ...") |
| coeff_dict = np.load(coeff_npy_name, allow_pickle=True).tolist() |
| identity_arr = coeff_dict['id'] |
| exp_arr = coeff_dict['exp'] |
| ret_dict['id'] = identity_arr |
| ret_dict['exp'] = exp_arr |
| euler_arr = ret_dict['euler'] = coeff_dict['euler'] |
| trans_arr = ret_dict['trans'] = coeff_dict['trans'] |
| print("calculating lm3d ...") |
| idexp_lm3d_arr = face3d_helper.reconstruct_idexp_lm3d(torch.from_numpy(identity_arr), torch.from_numpy(exp_arr)).cpu().numpy().reshape([-1, 68*3]) |
| len_motion = len(idexp_lm3d_arr) |
| video_idexp_lm3d_mean = idexp_lm3d_arr.mean(axis=0) |
| video_idexp_lm3d_std = idexp_lm3d_arr.std(axis=0) |
| ret_dict['idexp_lm3d'] = idexp_lm3d_arr |
| ret_dict['idexp_lm3d_mean'] = video_idexp_lm3d_mean |
| ret_dict['idexp_lm3d_std'] = video_idexp_lm3d_std |
| |
| |
| eulers = torch.FloatTensor(euler_arr) |
| trans = torch.FloatTensor(trans_arr) |
| rots = face_model.compute_rotation(eulers) |
|
|
| |
| trans[:, 2] = 10 - trans[:, 2] |
| rots = rots.permute(0, 2, 1) |
| trans[:, 2] = - trans[:,2] |
| |
| trans = trans / 10.0 |
| rots_inv = rots.permute(0, 2, 1) |
| trans_inv = - torch.bmm(rots_inv, trans.unsqueeze(2)) |
|
|
| pose = torch.eye(4, dtype=torch.float32).unsqueeze(0).repeat([len_motion, 1, 1]) |
| pose[:, :3, :3] = rots_inv |
| pose[:, :3, 3] = trans_inv[:, :, 0] |
| c2w_transform_matrices = pose.numpy() |
|
|
| |
| print("loading hubert ...") |
| hubert_features = np.load(hubert_npy_name) |
| print("loading Mel and F0 ...") |
| mel_f0_features = np.load(mel_f0_npy_name, allow_pickle=True).tolist() |
|
|
| ret_dict['hubert'] = hubert_features |
| ret_dict['mel'] = mel_f0_features['mel'] |
| ret_dict['f0'] = mel_f0_features['f0'] |
|
|
| |
| frame_indices = list(range(len_motion)) |
| num_train = len_motion // 11 * 10 |
| train_indices = frame_indices[:num_train] |
| val_indices = frame_indices[num_train:] |
|
|
| for split in ['train', 'val']: |
| if split == 'train': |
| indices = train_indices |
| samples = [] |
| ret_dict['train_samples'] = samples |
| elif split == 'val': |
| indices = val_indices |
| samples = [] |
| ret_dict['val_samples'] = samples |
| |
| for idx in indices: |
| sample = {} |
| sample['idx'] = idx |
| sample['head_img_fname'] = os.path.join(head_img_dir,f"{idx:08d}.png") |
| sample['torso_img_fname'] = os.path.join(torso_img_dir,f"{idx:08d}.png") |
| sample['gt_img_fname'] = os.path.join(gt_img_dir,f"{idx:08d}.jpg") |
| |
| sample['face_rect'] = face_rects[idx] |
| sample['lip_rect'] = lip_rect_lst[idx] |
| sample['c2w'] = c2w_transform_matrices[idx] |
| samples.append(sample) |
| return ret_dict |
|
|
|
|
| class Binarizer: |
| def __init__(self): |
| self.data_dir = 'data/' |
| |
| def parse(self, video_id): |
| processed_dir = os.path.join(self.data_dir, 'processed/videos', video_id) |
| binary_dir = os.path.join(self.data_dir, 'binary/videos', video_id) |
| out_fname = os.path.join(binary_dir, "trainval_dataset.npy") |
| os.makedirs(binary_dir, exist_ok=True) |
| ret = load_processed_data(processed_dir) |
| mel_name = os.path.join(processed_dir, 'aud_mel_f0.npy') |
| mel_f0_dict = np.load(mel_name, allow_pickle=True).tolist() |
| ret.update(mel_f0_dict) |
| np.save(out_fname, ret, allow_pickle=True) |
|
|
|
|
|
|
| if __name__ == '__main__': |
| from argparse import ArgumentParser |
| parser = ArgumentParser() |
| parser.add_argument('--video_id', type=str, default='May', help='') |
| args = parser.parse_args() |
| |
| video_id = args.video_id |
| face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', |
| camera_distance=10, focal=1015) |
| face_model.to("cpu") |
| face3d_helper = Face3DHelper() |
|
|
| binarizer = Binarizer() |
| binarizer.parse(video_id) |
| print(f"Binarization for {video_id} Done!") |
|
|