| from scipy.spatial import ConvexHull |
| import torch |
| import torch.nn.functional as F |
| import numpy as np |
| from tqdm import tqdm |
|
|
| def normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False, |
| use_relative_movement=False, use_relative_jacobian=False): |
| if adapt_movement_scale: |
| source_area = ConvexHull(kp_source['value'][0].data.cpu().numpy()).volume |
| driving_area = ConvexHull(kp_driving_initial['value'][0].data.cpu().numpy()).volume |
| adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area) |
| else: |
| adapt_movement_scale = 1 |
|
|
| kp_new = {k: v for k, v in kp_driving.items()} |
|
|
| if use_relative_movement: |
| kp_value_diff = (kp_driving['value'] - kp_driving_initial['value']) |
| kp_value_diff *= adapt_movement_scale |
| kp_new['value'] = kp_value_diff + kp_source['value'] |
|
|
| if use_relative_jacobian: |
| jacobian_diff = torch.matmul(kp_driving['jacobian'], torch.inverse(kp_driving_initial['jacobian'])) |
| kp_new['jacobian'] = torch.matmul(jacobian_diff, kp_source['jacobian']) |
|
|
| return kp_new |
|
|
| def headpose_pred_to_degree(pred): |
| device = pred.device |
| idx_tensor = [idx for idx in range(66)] |
| idx_tensor = torch.FloatTensor(idx_tensor).type_as(pred).to(device) |
| pred = F.softmax(pred) |
| degree = torch.sum(pred*idx_tensor, 1) * 3 - 99 |
| return degree |
|
|
| def get_rotation_matrix(yaw, pitch, roll): |
| yaw = yaw / 180 * 3.14 |
| pitch = pitch / 180 * 3.14 |
| roll = roll / 180 * 3.14 |
|
|
| roll = roll.unsqueeze(1) |
| pitch = pitch.unsqueeze(1) |
| yaw = yaw.unsqueeze(1) |
|
|
| pitch_mat = torch.cat([torch.ones_like(pitch), torch.zeros_like(pitch), torch.zeros_like(pitch), |
| torch.zeros_like(pitch), torch.cos(pitch), -torch.sin(pitch), |
| torch.zeros_like(pitch), torch.sin(pitch), torch.cos(pitch)], dim=1) |
| pitch_mat = pitch_mat.view(pitch_mat.shape[0], 3, 3) |
|
|
| yaw_mat = torch.cat([torch.cos(yaw), torch.zeros_like(yaw), torch.sin(yaw), |
| torch.zeros_like(yaw), torch.ones_like(yaw), torch.zeros_like(yaw), |
| -torch.sin(yaw), torch.zeros_like(yaw), torch.cos(yaw)], dim=1) |
| yaw_mat = yaw_mat.view(yaw_mat.shape[0], 3, 3) |
|
|
| roll_mat = torch.cat([torch.cos(roll), -torch.sin(roll), torch.zeros_like(roll), |
| torch.sin(roll), torch.cos(roll), torch.zeros_like(roll), |
| torch.zeros_like(roll), torch.zeros_like(roll), torch.ones_like(roll)], dim=1) |
| roll_mat = roll_mat.view(roll_mat.shape[0], 3, 3) |
|
|
| rot_mat = torch.einsum('bij,bjk,bkm->bim', pitch_mat, yaw_mat, roll_mat) |
|
|
| return rot_mat |
|
|
| def keypoint_transformation(kp_canonical, he, wo_exp=False): |
| kp = kp_canonical['value'] |
| yaw, pitch, roll= he['yaw'], he['pitch'], he['roll'] |
| yaw = headpose_pred_to_degree(yaw) |
| pitch = headpose_pred_to_degree(pitch) |
| roll = headpose_pred_to_degree(roll) |
|
|
| if 'yaw_in' in he: |
| yaw = he['yaw_in'] |
| if 'pitch_in' in he: |
| pitch = he['pitch_in'] |
| if 'roll_in' in he: |
| roll = he['roll_in'] |
|
|
| rot_mat = get_rotation_matrix(yaw, pitch, roll) |
|
|
| t, exp = he['t'], he['exp'] |
| if wo_exp: |
| exp = exp*0 |
| |
| |
| kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp) |
|
|
| |
| t[:, 0] = t[:, 0]*0 |
| t[:, 2] = t[:, 2]*0 |
| t = t.unsqueeze(1).repeat(1, kp.shape[1], 1) |
| kp_t = kp_rotated + t |
|
|
| |
| exp = exp.view(exp.shape[0], -1, 3) |
| kp_transformed = kp_t + exp |
|
|
| return {'value': kp_transformed} |
|
|
|
|
|
|
| def make_animation(source_image, source_semantics, target_semantics, |
| generator, kp_detector, he_estimator, mapping, |
| yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None, |
| use_exp=True, use_half=False): |
| with torch.no_grad(): |
| predictions = [] |
|
|
| kp_canonical = kp_detector(source_image) |
| he_source = mapping(source_semantics) |
| kp_source = keypoint_transformation(kp_canonical, he_source) |
| |
| for frame_idx in tqdm(range(target_semantics.shape[1]), 'Face Renderer:'): |
| |
| |
| target_semantics_frame = target_semantics[:, frame_idx] |
| he_driving = mapping(target_semantics_frame) |
| if yaw_c_seq is not None: |
| he_driving['yaw_in'] = yaw_c_seq[:, frame_idx] |
| if pitch_c_seq is not None: |
| he_driving['pitch_in'] = pitch_c_seq[:, frame_idx] |
| if roll_c_seq is not None: |
| he_driving['roll_in'] = roll_c_seq[:, frame_idx] |
| |
| kp_driving = keypoint_transformation(kp_canonical, he_driving) |
| |
| kp_norm = kp_driving |
| out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm) |
| ''' |
| source_image_new = out['prediction'].squeeze(1) |
| kp_canonical_new = kp_detector(source_image_new) |
| he_source_new = he_estimator(source_image_new) |
| kp_source_new = keypoint_transformation(kp_canonical_new, he_source_new, wo_exp=True) |
| kp_driving_new = keypoint_transformation(kp_canonical_new, he_driving, wo_exp=True) |
| out = generator(source_image_new, kp_source=kp_source_new, kp_driving=kp_driving_new) |
| ''' |
| predictions.append(out['prediction']) |
| predictions_ts = torch.stack(predictions, dim=1) |
| return predictions_ts |
|
|
| class AnimateModel(torch.nn.Module): |
| """ |
| Merge all generator related updates into single model for better multi-gpu usage |
| """ |
|
|
| def __init__(self, generator, kp_extractor, mapping): |
| super(AnimateModel, self).__init__() |
| self.kp_extractor = kp_extractor |
| self.generator = generator |
| self.mapping = mapping |
|
|
| self.kp_extractor.eval() |
| self.generator.eval() |
| self.mapping.eval() |
|
|
| def forward(self, x): |
| |
| source_image = x['source_image'] |
| source_semantics = x['source_semantics'] |
| target_semantics = x['target_semantics'] |
| yaw_c_seq = x['yaw_c_seq'] |
| pitch_c_seq = x['pitch_c_seq'] |
| roll_c_seq = x['roll_c_seq'] |
|
|
| predictions_video = make_animation(source_image, source_semantics, target_semantics, |
| self.generator, self.kp_extractor, |
| self.mapping, use_exp = True, |
| yaw_c_seq=yaw_c_seq, pitch_c_seq=pitch_c_seq, roll_c_seq=roll_c_seq) |
| |
| return predictions_video |