| |
|
|
| import numpy as np |
| import copy |
| from ..retarget_pose import get_retarget_pose |
|
|
| L_EYE_IDXS = list(range(36, 42)) |
| R_EYE_IDXS = list(range(42, 48)) |
| NOSE_TIP = 30 |
| MOUTH_L = 48 |
| MOUTH_R = 54 |
| JAW_LINE = list(range(0, 17)) |
|
|
|
|
| |
| def aaposemeta_to_dwpose(meta): |
| candidate_body = meta['keypoints_body'][:-2][:, :2] |
| score_body = meta['keypoints_body'][:-2][:, 2] |
| subset_body = np.arange(len(candidate_body), dtype=float) |
| subset_body[score_body <= 0] = -1 |
| bodies = { |
| "candidate": candidate_body, |
| "subset": np.expand_dims(subset_body, axis=0), |
| "score": np.expand_dims(score_body, axis=0) |
| } |
| hands_coords = np.stack([ |
| meta['keypoints_right_hand'][:, :2], |
| meta['keypoints_left_hand'][:, :2] |
| ]) |
| hands_score = np.stack([ |
| meta['keypoints_right_hand'][:, 2], |
| meta['keypoints_left_hand'][:, 2] |
| ]) |
| faces_coords = np.expand_dims(meta['keypoints_face'][1:][:, :2], axis=0) |
| faces_score = np.expand_dims(meta['keypoints_face'][1:][:, 2], axis=0) |
| dwpose_format = { |
| "bodies": bodies, |
| "hands": hands_coords, |
| "hands_score": hands_score, |
| "faces": faces_coords, |
| "faces_score": faces_score |
| } |
| return dwpose_format |
|
|
| def aaposemeta_obj_to_dwpose(pose_meta): |
| """ |
| Convert an AAPoseMeta object into a dwpose-like data structure |
| Restore coordinates to relative coordinates (divide by width, height) |
| Only handle None -> fill with zeros |
| """ |
| w = pose_meta.width |
| h = pose_meta.height |
|
|
| |
| def safe(arr, like_shape): |
| if arr is None: |
| return np.zeros(like_shape, dtype=np.float32) |
| arr_np = np.array(arr, dtype=np.float32) |
| arr_np = np.nan_to_num(arr_np, nan=0.0) |
| return arr_np |
| |
| kps_body = safe(pose_meta.kps_body, (pose_meta.kps_body_p.shape[0], 2)) |
| candidate_body = kps_body / np.array([w, h]) |
| score_body = safe(pose_meta.kps_body_p, (candidate_body.shape[0],)) |
| subset_body = np.arange(len(candidate_body), dtype=float) |
| subset_body[score_body <= 0] = -1 |
| bodies = { |
| "candidate": candidate_body, |
| "subset": np.expand_dims(subset_body, axis=0), |
| "score": np.expand_dims(score_body, axis=0) |
| } |
|
|
| |
| kps_rhand = safe(pose_meta.kps_rhand, (pose_meta.kps_rhand_p.shape[0], 2)) |
| kps_lhand = safe(pose_meta.kps_lhand, (pose_meta.kps_lhand_p.shape[0], 2)) |
| hands_coords = np.stack([ |
| kps_rhand / np.array([w, h]), |
| kps_lhand / np.array([w, h]) |
| ]) |
| hands_score = np.stack([ |
| safe(pose_meta.kps_rhand_p, (kps_rhand.shape[0],)), |
| safe(pose_meta.kps_lhand_p, (kps_lhand.shape[0],)) |
| ]) |
|
|
| dwpose_format = { |
| "bodies": bodies, |
| "hands": hands_coords, |
| "hands_score": hands_score, |
| "faces": None, |
| "faces_score": None |
| } |
| return dwpose_format |
|
|
| |
|
|
| def _to_68x2(arr): |
| if arr.shape == (1, 68, 2): |
| def to_orig(x): |
| x = np.asarray(x, dtype=np.float64) |
| if x.shape != (68, 2): |
| raise ValueError("to_orig expects (68,2)") |
| return x[np.newaxis, :, :] |
| return arr[0].astype(np.float64), to_orig |
| if arr.shape == (68, 2): |
| def to_orig(x): |
| x = np.asarray(x, dtype=np.float64) |
| if x.shape != (68, 2): |
| raise ValueError("to_orig expects (68,2)") |
| return x |
| return arr.astype(np.float64), to_orig |
| if arr.shape == (2, 68): |
| def to_orig(x): |
| x = np.asarray(x, dtype=np.float64) |
| if x.shape != (68, 2): |
| raise ValueError("to_orig expects (68,2)") |
| return x.T |
| return arr.T.astype(np.float64), to_orig |
| raise ValueError(f"faces shape {arr.shape} not supported; expected (1,68,2) or (68,2) or (2,68)") |
|
|
| def _eye_center(face68, idxs): |
| return face68[idxs].mean(axis=0) |
|
|
| def _anchors(face68): |
| le = _eye_center(face68, L_EYE_IDXS) |
| re = _eye_center(face68, R_EYE_IDXS) |
| nose = face68[NOSE_TIP] |
| lm = face68[MOUTH_L] |
| rm = face68[MOUTH_R] |
| if re[0] < le[0]: |
| le, re = re, le |
| return np.stack([le, re, nose, lm, rm], axis=0) |
|
|
| def _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0, anchor_pairs=[[36, 45], [27, 8]]): |
| """ |
| Rough alignment - adjust the shape of the source face according to the proportions of the reference, and align the nose tip to target_nose_pos. |
| anchor_pairs: |
| - [36, 45] for x |
| - [27, 8] for y |
| """ |
| src = np.asarray(src68, dtype=np.float64) |
| ref = np.asarray(ref68, dtype=np.float64) |
|
|
| center = _anchors(src).mean(axis=0) |
| src_centered = src - center |
|
|
| src_w = np.linalg.norm(src[anchor_pairs[0][0]] - src[anchor_pairs[0][1]]) |
| ref_w = np.linalg.norm(ref[anchor_pairs[0][0]] - ref[anchor_pairs[0][1]]) |
|
|
| src_h = np.linalg.norm(src[anchor_pairs[1][0]] - src[anchor_pairs[1][1]]) |
| ref_h = np.linalg.norm(ref[anchor_pairs[1][0]] - ref[anchor_pairs[1][1]]) |
|
|
| scale_x = ref_w / src_w if src_w > 1e-6 else 1.0 |
| scale_y = ref_h / src_h if src_h > 1e-6 else 1.0 |
|
|
| scaled_local = src_centered.copy() |
| scaled_local[:, 0] *= (1 - alpha) + scale_x * alpha |
| scaled_local[:, 1] *= (1 - alpha) + scale_y * alpha |
| scaled_global = scaled_local + center |
|
|
| nose_idx = NOSE_TIP |
| current_nose = scaled_global[nose_idx] |
| offset = target_nose_pos - current_nose |
| scaled_global += offset |
|
|
| return scaled_global |
|
|
| |
|
|
|
|
| def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False): |
| aligned_pose = copy.deepcopy(tgt_pose) |
| th = 1e-6 |
| ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32) |
| tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32) |
|
|
| ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1) |
| tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1) |
|
|
| ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th) |
| tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th) |
| shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid |
|
|
| ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th) |
| tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th) |
| hip_ok = ref_hip_valid and tgt_hip_valid |
|
|
| if shoulder_ok and hip_ok: |
| ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0]) |
| tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0]) |
| x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0 |
|
|
| ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1])) |
| tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1])) |
| y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0 |
| scale_ratio = (x_ratio + y_ratio) / 2 |
|
|
| elif shoulder_ok: |
| ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5]) |
| tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5]) |
| scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0 |
|
|
| else: |
| ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17]) |
| tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17]) |
| scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0 |
|
|
| if return_ratio: |
| return scale_ratio |
|
|
| |
| anchor_idx = 1 |
| anchor_pt_before_scale = tgt_kpt[anchor_idx].copy() |
| def scale(arr): |
| if arr is not None and arr.size > 0: |
| arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio |
| arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio |
| scale(tgt_kpt) |
| scale(aligned_pose.get('faces')) |
| scale(aligned_pose.get('hands')) |
|
|
| |
| offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx] |
| def translate(arr): |
| if arr is not None and arr.size > 0: |
| arr += offset |
| translate(tgt_kpt) |
| translate(aligned_pose.get('faces')) |
| translate(aligned_pose.get('hands')) |
| aligned_pose['bodies']['candidate'] = tgt_kpt |
|
|
| return aligned_pose, shoulder_ok, hip_ok |
|
|
| |
|
|
| def align_to_reference(ref_pose_meta, tpl_pose_metas, tpl_dwposes, anchor_idx=None): |
| |
|
|
| ref_pose_dw = aaposemeta_to_dwpose(ref_pose_meta) |
| best_idx = anchor_idx |
| tpl_pose_meta_best = tpl_pose_metas[best_idx] |
|
|
| tpl_retarget_pose_metas = get_retarget_pose( |
| tpl_pose_meta_best, |
| ref_pose_meta, |
| tpl_pose_metas, |
| None, None |
| ) |
|
|
| retarget_dwposes = [aaposemeta_obj_to_dwpose(pm) for pm in tpl_retarget_pose_metas] |
|
|
| if ref_pose_dw['faces'] is not None: |
| ref68, _ = _to_68x2(ref_pose_dw['faces']) |
| for frame_idx, (tpl_dw, rt_dw) in enumerate(zip(tpl_dwposes, retarget_dwposes)): |
| if tpl_dw['faces'] is None: |
| continue |
| src68, to_orig = _to_68x2(tpl_dw['faces']) |
| target_nose_pos = rt_dw['bodies']['candidate'][0] |
| scaled68 = _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0) |
| rt_dw['faces'] = to_orig(scaled68) |
| rt_dw['faces_score'] = tpl_dw['faces_score'] |
|
|
| return retarget_dwposes |
|
|
| |
|
|
|
|
| def compute_ratios_stepwise(ref_scores, source_scores, ref_pts, src_pts, conf_th=0.9, th=1e-6): |
|
|
| def keypoint_valid(idx): |
| return ref_scores[0, idx] >= conf_th and source_scores[0, idx] >= conf_th |
|
|
| def safe_ratio(p1, p2): |
| len_ref = np.linalg.norm(ref_pts[p1] - ref_pts[p2]) |
| len_src = np.linalg.norm(src_pts[p1] - src_pts[p2]) |
| if len_src > th: |
| return len_ref / len_src |
| else: |
| return 1.0 |
|
|
| ratio_pairs = [ |
| (0,1),(1,2),(1,5),(2,3),(3,4),(5,6),(6,7), |
| (0,14),(0,15),(14,16),(15,17), |
| (8,9),(9,10),(11,12),(12,13), |
| (1,8),(1,11) |
| ] |
| ratios = {p: 1.0 for p in ratio_pairs} |
|
|
| parent_map = { |
| (3, 4): (2, 3), |
| (6, 7): (5, 6), |
| (9, 10): (8, 9), |
| (12, 13): (11, 12) |
| } |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,14,15,16,17]): |
| ratios[(0,1)] = safe_ratio(0,1) |
| ratios[(0,14)] = safe_ratio(0,14) |
| ratios[(0,15)] = safe_ratio(0,15) |
| ratios[(14,16)]= safe_ratio(14,16) |
| ratios[(15,17)]= safe_ratio(15,17) |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17]): |
| ratios[(1,2)] = safe_ratio(1,2) |
| ratios[(1,5)] = safe_ratio(1,5) |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6]): |
| ratios[(2,3)] = safe_ratio(2,3) |
| ratios[(5,6)] = safe_ratio(5,6) |
| ratios[(3,4)] = ratios[parent_map[(3,4)]] |
| ratios[(6,7)] = ratios[parent_map[(6,7)]] |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11]): |
| ratios[(1,8)] = safe_ratio(1,8) |
| ratios[(1,11)] = safe_ratio(1,11) |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7]): |
| ratios[(3,4)] = safe_ratio(3,4) |
| ratios[(6,7)] = safe_ratio(6,7) |
|
|
| |
| if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7,9,12]): |
| ratios[(8,9)] = safe_ratio(8,9) |
| ratios[(11,12)] = safe_ratio(11,12) |
| ratios[(9,10)] = ratios[parent_map[(9,10)]] |
| ratios[(12,13)]= ratios[parent_map[(12,13)]] |
|
|
| |
| if all(keypoint_valid(i) for i in range(18)): |
| for p in ratio_pairs: |
| ratios[p] = safe_ratio(*p) |
|
|
| symmetric_pairs = [ |
| ((1, 2), (1, 5)), |
| ((2, 3), (5, 6)), |
| ((3, 4), (6, 7)), |
| ((8, 9), (11, 12)), |
| ((9, 10), (12, 13)) |
| ] |
| for left_key, right_key in symmetric_pairs: |
| left_val = ratios.get(left_key) |
| right_val = ratios.get(right_key) |
| if left_val is not None and right_val is not None: |
| avg_val = (left_val + right_val) / 2.0 |
| ratios[left_key] = avg_val |
| ratios[right_key] = avg_val |
|
|
| eye_pairs = [ |
| ((13, 15), (14, 16)) |
| ] |
| for left_key, right_key in eye_pairs: |
| left_val = ratios.get(left_key) |
| right_val = ratios.get(right_key) |
| if left_val is not None and right_val is not None: |
| avg_val = (left_val + right_val) / 2.0 |
| ratios[left_key] = avg_val |
| ratios[right_key] = avg_val |
|
|
| return ratios |
|
|
| def align_to_pose(ref_dwpose, tpl_dwposes,anchor_idx=None,conf_th=0.9,): |
| detected_poses = copy.deepcopy(tpl_dwposes) |
|
|
| best_pose = tpl_dwposes[anchor_idx] |
| ref_pose_scaled, _, _ = scale_and_translate_pose(ref_dwpose, best_pose, conf_th=conf_th) |
|
|
| ref_candidate = ref_pose_scaled['bodies']['candidate'].astype(np.float32) |
| ref_scores = ref_pose_scaled['bodies']['score'].astype(np.float32) |
|
|
| source_candidate = best_pose['bodies']['candidate'].astype(np.float32) |
| source_scores = best_pose['bodies']['score'].astype(np.float32) |
|
|
| has_ref_face = 'faces' in ref_pose_scaled and ref_pose_scaled['faces'] is not None and ref_pose_scaled['faces'].size > 0 |
| if has_ref_face: |
| try: |
| ref68, _ = _to_68x2(ref_pose_scaled['faces']) |
| except Exception as e: |
| print("Reference face conversion failed:", e) |
| has_ref_face = False |
|
|
| ratios = compute_ratios_stepwise(ref_scores, source_scores, ref_candidate, source_candidate, conf_th=conf_th, th=1e-6) |
|
|
| for pose in detected_poses: |
| candidate = pose['bodies']['candidate'] |
| hands = pose['hands'] |
|
|
| |
| ratio = ratios[(0, 1)] |
| x_offset = (candidate[1][0] - candidate[0][0]) * (1. - ratio) |
| y_offset = (candidate[1][1] - candidate[0][1]) * (1. - ratio) |
| candidate[[0, 14, 15, 16, 17], 0] += x_offset |
| candidate[[0, 14, 15, 16, 17], 1] += y_offset |
|
|
| |
| ratio = ratios[(1, 2)] |
| x_offset = (candidate[1][0] - candidate[2][0]) * (1. - ratio) |
| y_offset = (candidate[1][1] - candidate[2][1]) * (1. - ratio) |
| candidate[[2, 3, 4], 0] += x_offset |
| candidate[[2, 3, 4], 1] += y_offset |
| hands[1, :, 0] += x_offset |
| hands[1, :, 1] += y_offset |
|
|
| |
| ratio = ratios[(1, 5)] |
| x_offset = (candidate[1][0] - candidate[5][0]) * (1. - ratio) |
| y_offset = (candidate[1][1] - candidate[5][1]) * (1. - ratio) |
| candidate[[5, 6, 7], 0] += x_offset |
| candidate[[5, 6, 7], 1] += y_offset |
| hands[0, :, 0] += x_offset |
| hands[0, :, 1] += y_offset |
|
|
| |
| ratio = ratios[(2, 3)] |
| x_offset = (candidate[2][0] - candidate[3][0]) * (1. - ratio) |
| y_offset = (candidate[2][1] - candidate[3][1]) * (1. - ratio) |
| candidate[[3, 4], 0] += x_offset |
| candidate[[3, 4], 1] += y_offset |
| hands[1, :, 0] += x_offset |
| hands[1, :, 1] += y_offset |
|
|
| |
| ratio = ratios[(3, 4)] |
| x_offset = (candidate[3][0] - candidate[4][0]) * (1. - ratio) |
| y_offset = (candidate[3][1] - candidate[4][1]) * (1. - ratio) |
| candidate[4, 0] += x_offset |
| candidate[4, 1] += y_offset |
| hands[1, :, 0] += x_offset |
| hands[1, :, 1] += y_offset |
|
|
| |
| ratio = ratios[(5, 6)] |
| x_offset = (candidate[5][0] - candidate[6][0]) * (1. - ratio) |
| y_offset = (candidate[5][1] - candidate[6][1]) * (1. - ratio) |
| candidate[[6, 7], 0] += x_offset |
| candidate[[6, 7], 1] += y_offset |
| hands[0, :, 0] += x_offset |
| hands[0, :, 1] += y_offset |
|
|
| |
| ratio = ratios[(6, 7)] |
| x_offset = (candidate[6][0] - candidate[7][0]) * (1. - ratio) |
| y_offset = (candidate[6][1] - candidate[7][1]) * (1. - ratio) |
| candidate[7, 0] += x_offset |
| candidate[7, 1] += y_offset |
| hands[0, :, 0] += x_offset |
| hands[0, :, 1] += y_offset |
|
|
| |
| for (p1, p2) in [(0,14),(0,15),(14,16),(15,17)]: |
| ratio = ratios[(p1,p2)] |
| x_offset = (candidate[p1][0] - candidate[p2][0]) * (1. - ratio) |
| y_offset = (candidate[p1][1] - candidate[p2][1]) * (1. - ratio) |
| candidate[p2, 0] += x_offset |
| candidate[p2, 1] += y_offset |
|
|
| |
| ratio = ratios[(1, 8)] |
| x_offset = (candidate[1][0] - candidate[8][0]) * (1. - ratio) |
| y_offset = (candidate[1][1] - candidate[8][1]) * (1. - ratio) |
| candidate[8, 0] += x_offset |
| candidate[8, 1] += y_offset |
|
|
| ratio = ratios[(1, 11)] |
| x_offset = (candidate[1][0] - candidate[11][0]) * (1. - ratio) |
| y_offset = (candidate[1][1] - candidate[11][1]) * (1. - ratio) |
| candidate[11, 0] += x_offset |
| candidate[11, 1] += y_offset |
|
|
| |
| ratio = ratios[(8, 9)] |
| x_offset = (candidate[9][0] - candidate[8][0]) * (ratio - 1.) |
| y_offset = (candidate[9][1] - candidate[8][1]) * (ratio - 1.) |
| candidate[[9, 10], 0] += x_offset |
| candidate[[9, 10], 1] += y_offset |
|
|
| ratio = ratios[(9, 10)] |
| x_offset = (candidate[10][0] - candidate[9][0]) * (ratio - 1.) |
| y_offset = (candidate[10][1] - candidate[9][1]) * (ratio - 1.) |
| candidate[10, 0] += x_offset |
| candidate[10, 1] += y_offset |
|
|
| ratio = ratios[(11, 12)] |
| x_offset = (candidate[12][0] - candidate[11][0]) * (ratio - 1.) |
| y_offset = (candidate[12][1] - candidate[11][1]) * (ratio - 1.) |
| candidate[[12, 13], 0] += x_offset |
| candidate[[12, 13], 1] += y_offset |
|
|
| ratio = ratios[(12, 13)] |
| x_offset = (candidate[13][0] - candidate[12][0]) * (ratio - 1.) |
| y_offset = (candidate[13][1] - candidate[12][1]) * (ratio - 1.) |
| candidate[13, 0] += x_offset |
| candidate[13, 1] += y_offset |
|
|
| |
| if has_ref_face and 'faces' in pose and pose['faces'] is not None and pose['faces'].size > 0: |
| try: |
| src68, to_orig = _to_68x2(pose['faces']) |
| scaled68 = _face_scale_only(src68, ref68, candidate[0], alpha=1.0) |
| pose['faces'] = to_orig(scaled68) |
| except Exception as e: |
| print("Reference face conversion failed:", e) |
| continue |
|
|
| return detected_poses |
|
|