Eji-Sensei14's picture
Upload folder using huggingface_hub
edd3cd4 verified
# https://github.com/ssj9596/One-to-All-Animation
import numpy as np
import copy
from ..retarget_pose import get_retarget_pose
L_EYE_IDXS = list(range(36, 42))
R_EYE_IDXS = list(range(42, 48))
NOSE_TIP = 30
MOUTH_L = 48
MOUTH_R = 54
JAW_LINE = list(range(0, 17))
# ===========================Convert wanpose format into our dwpose-like format======================
def aaposemeta_to_dwpose(meta):
candidate_body = meta['keypoints_body'][:-2][:, :2]
score_body = meta['keypoints_body'][:-2][:, 2]
subset_body = np.arange(len(candidate_body), dtype=float)
subset_body[score_body <= 0] = -1
bodies = {
"candidate": candidate_body,
"subset": np.expand_dims(subset_body, axis=0), # shape (1, N)
"score": np.expand_dims(score_body, axis=0) # shape (1, N)
}
hands_coords = np.stack([
meta['keypoints_right_hand'][:, :2],
meta['keypoints_left_hand'][:, :2]
])
hands_score = np.stack([
meta['keypoints_right_hand'][:, 2],
meta['keypoints_left_hand'][:, 2]
])
faces_coords = np.expand_dims(meta['keypoints_face'][1:][:, :2], axis=0)
faces_score = np.expand_dims(meta['keypoints_face'][1:][:, 2], axis=0)
dwpose_format = {
"bodies": bodies,
"hands": hands_coords,
"hands_score": hands_score,
"faces": faces_coords,
"faces_score": faces_score
}
return dwpose_format
def aaposemeta_obj_to_dwpose(pose_meta):
"""
Convert an AAPoseMeta object into a dwpose-like data structure
Restore coordinates to relative coordinates (divide by width, height)
Only handle None -> fill with zeros
"""
w = pose_meta.width
h = pose_meta.height
# If None, fill with all zeros
def safe(arr, like_shape):
if arr is None:
return np.zeros(like_shape, dtype=np.float32)
arr_np = np.array(arr, dtype=np.float32)
arr_np = np.nan_to_num(arr_np, nan=0.0)
return arr_np
# body
kps_body = safe(pose_meta.kps_body, (pose_meta.kps_body_p.shape[0], 2))
candidate_body = kps_body / np.array([w, h])
score_body = safe(pose_meta.kps_body_p, (candidate_body.shape[0],))
subset_body = np.arange(len(candidate_body), dtype=float)
subset_body[score_body <= 0] = -1
bodies = {
"candidate": candidate_body,
"subset": np.expand_dims(subset_body, axis=0),
"score": np.expand_dims(score_body, axis=0)
}
# hands
kps_rhand = safe(pose_meta.kps_rhand, (pose_meta.kps_rhand_p.shape[0], 2))
kps_lhand = safe(pose_meta.kps_lhand, (pose_meta.kps_lhand_p.shape[0], 2))
hands_coords = np.stack([
kps_rhand / np.array([w, h]),
kps_lhand / np.array([w, h])
])
hands_score = np.stack([
safe(pose_meta.kps_rhand_p, (kps_rhand.shape[0],)),
safe(pose_meta.kps_lhand_p, (kps_lhand.shape[0],))
])
dwpose_format = {
"bodies": bodies,
"hands": hands_coords,
"hands_score": hands_score,
"faces": None,
"faces_score": None
}
return dwpose_format
# ===============================Face Rough alignment======================
def _to_68x2(arr):
if arr.shape == (1, 68, 2):
def to_orig(x):
x = np.asarray(x, dtype=np.float64)
if x.shape != (68, 2):
raise ValueError("to_orig expects (68,2)")
return x[np.newaxis, :, :]
return arr[0].astype(np.float64), to_orig
if arr.shape == (68, 2):
def to_orig(x):
x = np.asarray(x, dtype=np.float64)
if x.shape != (68, 2):
raise ValueError("to_orig expects (68,2)")
return x
return arr.astype(np.float64), to_orig
if arr.shape == (2, 68):
def to_orig(x):
x = np.asarray(x, dtype=np.float64)
if x.shape != (68, 2):
raise ValueError("to_orig expects (68,2)")
return x.T
return arr.T.astype(np.float64), to_orig
raise ValueError(f"faces shape {arr.shape} not supported; expected (1,68,2) or (68,2) or (2,68)")
def _eye_center(face68, idxs):
return face68[idxs].mean(axis=0)
def _anchors(face68):
le = _eye_center(face68, L_EYE_IDXS)
re = _eye_center(face68, R_EYE_IDXS)
nose = face68[NOSE_TIP]
lm = face68[MOUTH_L]
rm = face68[MOUTH_R]
if re[0] < le[0]:
le, re = re, le
return np.stack([le, re, nose, lm, rm], axis=0)
def _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0, anchor_pairs=[[36, 45], [27, 8]]):
"""
Rough alignment - adjust the shape of the source face according to the proportions of the reference, and align the nose tip to target_nose_pos.
anchor_pairs:
- [36, 45] for x
- [27, 8] for y
"""
src = np.asarray(src68, dtype=np.float64)
ref = np.asarray(ref68, dtype=np.float64)
center = _anchors(src).mean(axis=0)
src_centered = src - center
src_w = np.linalg.norm(src[anchor_pairs[0][0]] - src[anchor_pairs[0][1]])
ref_w = np.linalg.norm(ref[anchor_pairs[0][0]] - ref[anchor_pairs[0][1]])
src_h = np.linalg.norm(src[anchor_pairs[1][0]] - src[anchor_pairs[1][1]])
ref_h = np.linalg.norm(ref[anchor_pairs[1][0]] - ref[anchor_pairs[1][1]])
scale_x = ref_w / src_w if src_w > 1e-6 else 1.0
scale_y = ref_h / src_h if src_h > 1e-6 else 1.0
scaled_local = src_centered.copy()
scaled_local[:, 0] *= (1 - alpha) + scale_x * alpha
scaled_local[:, 1] *= (1 - alpha) + scale_y * alpha
scaled_global = scaled_local + center
nose_idx = NOSE_TIP
current_nose = scaled_global[nose_idx]
offset = target_nose_pos - current_nose
scaled_global += offset
return scaled_global
# ===============================Reference Img Pre-Process======================
def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False):
aligned_pose = copy.deepcopy(tgt_pose)
th = 1e-6
ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32)
ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1)
tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1)
ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th)
tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th)
shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid
ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th)
tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th)
hip_ok = ref_hip_valid and tgt_hip_valid
if shoulder_ok and hip_ok:
ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0])
tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0])
x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0
ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1]))
tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1]))
y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0
scale_ratio = (x_ratio + y_ratio) / 2
elif shoulder_ok:
ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5])
tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5])
scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0
else:
ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17])
tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17])
scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0
if return_ratio:
return scale_ratio
# scale
anchor_idx = 1
anchor_pt_before_scale = tgt_kpt[anchor_idx].copy()
def scale(arr):
if arr is not None and arr.size > 0:
arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio
arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio
scale(tgt_kpt)
scale(aligned_pose.get('faces'))
scale(aligned_pose.get('hands'))
# offset
offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx]
def translate(arr):
if arr is not None and arr.size > 0:
arr += offset
translate(tgt_kpt)
translate(aligned_pose.get('faces'))
translate(aligned_pose.get('hands'))
aligned_pose['bodies']['candidate'] = tgt_kpt
return aligned_pose, shoulder_ok, hip_ok
# ===============================Align to Ref Driven Pose Retarget ======================
def align_to_reference(ref_pose_meta, tpl_pose_metas, tpl_dwposes, anchor_idx=None):
# pose retarget + face rough align
ref_pose_dw = aaposemeta_to_dwpose(ref_pose_meta)
best_idx = anchor_idx
tpl_pose_meta_best = tpl_pose_metas[best_idx]
tpl_retarget_pose_metas = get_retarget_pose(
tpl_pose_meta_best,
ref_pose_meta,
tpl_pose_metas,
None, None
)
retarget_dwposes = [aaposemeta_obj_to_dwpose(pm) for pm in tpl_retarget_pose_metas]
if ref_pose_dw['faces'] is not None:
ref68, _ = _to_68x2(ref_pose_dw['faces'])
for frame_idx, (tpl_dw, rt_dw) in enumerate(zip(tpl_dwposes, retarget_dwposes)):
if tpl_dw['faces'] is None:
continue
src68, to_orig = _to_68x2(tpl_dw['faces'])
target_nose_pos = rt_dw['bodies']['candidate'][0]
scaled68 = _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0)
rt_dw['faces'] = to_orig(scaled68)
rt_dw['faces_score'] = tpl_dw['faces_score']
return retarget_dwposes
# ===============================Rescale-Ref && Change part of pose(Option)======================
def compute_ratios_stepwise(ref_scores, source_scores, ref_pts, src_pts, conf_th=0.9, th=1e-6):
def keypoint_valid(idx):
return ref_scores[0, idx] >= conf_th and source_scores[0, idx] >= conf_th
def safe_ratio(p1, p2):
len_ref = np.linalg.norm(ref_pts[p1] - ref_pts[p2])
len_src = np.linalg.norm(src_pts[p1] - src_pts[p2])
if len_src > th:
return len_ref / len_src
else:
return 1.0
ratio_pairs = [
(0,1),(1,2),(1,5),(2,3),(3,4),(5,6),(6,7),
(0,14),(0,15),(14,16),(15,17),
(8,9),(9,10),(11,12),(12,13),
(1,8),(1,11)
]
ratios = {p: 1.0 for p in ratio_pairs}
parent_map = {
(3, 4): (2, 3),
(6, 7): (5, 6),
(9, 10): (8, 9),
(12, 13): (11, 12)
}
# Group 1 — head only
if all(keypoint_valid(i) for i in [0,1,14,15,16,17]):
ratios[(0,1)] = safe_ratio(0,1)
ratios[(0,14)] = safe_ratio(0,14)
ratios[(0,15)] = safe_ratio(0,15)
ratios[(14,16)]= safe_ratio(14,16)
ratios[(15,17)]= safe_ratio(15,17)
# Group 2 — +shoulder
if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17]):
ratios[(1,2)] = safe_ratio(1,2)
ratios[(1,5)] = safe_ratio(1,5)
# Group 3 — +upper arm
if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6]):
ratios[(2,3)] = safe_ratio(2,3)
ratios[(5,6)] = safe_ratio(5,6)
ratios[(3,4)] = ratios[parent_map[(3,4)]]
ratios[(6,7)] = ratios[parent_map[(6,7)]]
# Group 4 — +hips
if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11]):
ratios[(1,8)] = safe_ratio(1,8)
ratios[(1,11)] = safe_ratio(1,11)
# Group 5 — forearm own
if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7]):
ratios[(3,4)] = safe_ratio(3,4)
ratios[(6,7)] = safe_ratio(6,7)
# Group 6 — knees
if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7,9,12]):
ratios[(8,9)] = safe_ratio(8,9)
ratios[(11,12)] = safe_ratio(11,12)
ratios[(9,10)] = ratios[parent_map[(9,10)]]
ratios[(12,13)]= ratios[parent_map[(12,13)]]
# Full body — all ratios
if all(keypoint_valid(i) for i in range(18)):
for p in ratio_pairs:
ratios[p] = safe_ratio(*p)
symmetric_pairs = [
((1, 2), (1, 5)), # 两肩
((2, 3), (5, 6)), # 上臂
((3, 4), (6, 7)), # 前臂
((8, 9), (11, 12)), # 大腿
((9, 10), (12, 13)) # 小腿
]
for left_key, right_key in symmetric_pairs:
left_val = ratios.get(left_key)
right_val = ratios.get(right_key)
if left_val is not None and right_val is not None:
avg_val = (left_val + right_val) / 2.0
ratios[left_key] = avg_val
ratios[right_key] = avg_val
eye_pairs = [
((13, 15), (14, 16))
]
for left_key, right_key in eye_pairs:
left_val = ratios.get(left_key)
right_val = ratios.get(right_key)
if left_val is not None and right_val is not None:
avg_val = (left_val + right_val) / 2.0
ratios[left_key] = avg_val
ratios[right_key] = avg_val
return ratios
def align_to_pose(ref_dwpose, tpl_dwposes,anchor_idx=None,conf_th=0.9,):
detected_poses = copy.deepcopy(tpl_dwposes)
best_pose = tpl_dwposes[anchor_idx]
ref_pose_scaled, _, _ = scale_and_translate_pose(ref_dwpose, best_pose, conf_th=conf_th)
ref_candidate = ref_pose_scaled['bodies']['candidate'].astype(np.float32)
ref_scores = ref_pose_scaled['bodies']['score'].astype(np.float32)
source_candidate = best_pose['bodies']['candidate'].astype(np.float32)
source_scores = best_pose['bodies']['score'].astype(np.float32)
has_ref_face = 'faces' in ref_pose_scaled and ref_pose_scaled['faces'] is not None and ref_pose_scaled['faces'].size > 0
if has_ref_face:
try:
ref68, _ = _to_68x2(ref_pose_scaled['faces'])
except Exception as e:
print("Reference face conversion failed:", e)
has_ref_face = False
ratios = compute_ratios_stepwise(ref_scores, source_scores, ref_candidate, source_candidate, conf_th=conf_th, th=1e-6)
for pose in detected_poses:
candidate = pose['bodies']['candidate']
hands = pose['hands']
# ===== Neck =====
ratio = ratios[(0, 1)]
x_offset = (candidate[1][0] - candidate[0][0]) * (1. - ratio)
y_offset = (candidate[1][1] - candidate[0][1]) * (1. - ratio)
candidate[[0, 14, 15, 16, 17], 0] += x_offset
candidate[[0, 14, 15, 16, 17], 1] += y_offset
# ===== Shoulder Right =====
ratio = ratios[(1, 2)]
x_offset = (candidate[1][0] - candidate[2][0]) * (1. - ratio)
y_offset = (candidate[1][1] - candidate[2][1]) * (1. - ratio)
candidate[[2, 3, 4], 0] += x_offset
candidate[[2, 3, 4], 1] += y_offset
hands[1, :, 0] += x_offset
hands[1, :, 1] += y_offset
# ===== Shoulder Left =====
ratio = ratios[(1, 5)]
x_offset = (candidate[1][0] - candidate[5][0]) * (1. - ratio)
y_offset = (candidate[1][1] - candidate[5][1]) * (1. - ratio)
candidate[[5, 6, 7], 0] += x_offset
candidate[[5, 6, 7], 1] += y_offset
hands[0, :, 0] += x_offset
hands[0, :, 1] += y_offset
# ===== Upper Arm Right =====
ratio = ratios[(2, 3)]
x_offset = (candidate[2][0] - candidate[3][0]) * (1. - ratio)
y_offset = (candidate[2][1] - candidate[3][1]) * (1. - ratio)
candidate[[3, 4], 0] += x_offset
candidate[[3, 4], 1] += y_offset
hands[1, :, 0] += x_offset
hands[1, :, 1] += y_offset
# ===== Forearm Right =====
ratio = ratios[(3, 4)]
x_offset = (candidate[3][0] - candidate[4][0]) * (1. - ratio)
y_offset = (candidate[3][1] - candidate[4][1]) * (1. - ratio)
candidate[4, 0] += x_offset
candidate[4, 1] += y_offset
hands[1, :, 0] += x_offset
hands[1, :, 1] += y_offset
# ===== Upper Arm Left =====
ratio = ratios[(5, 6)]
x_offset = (candidate[5][0] - candidate[6][0]) * (1. - ratio)
y_offset = (candidate[5][1] - candidate[6][1]) * (1. - ratio)
candidate[[6, 7], 0] += x_offset
candidate[[6, 7], 1] += y_offset
hands[0, :, 0] += x_offset
hands[0, :, 1] += y_offset
# ===== Forearm Left =====
ratio = ratios[(6, 7)]
x_offset = (candidate[6][0] - candidate[7][0]) * (1. - ratio)
y_offset = (candidate[6][1] - candidate[7][1]) * (1. - ratio)
candidate[7, 0] += x_offset
candidate[7, 1] += y_offset
hands[0, :, 0] += x_offset
hands[0, :, 1] += y_offset
# ===== Head parts =====
for (p1, p2) in [(0,14),(0,15),(14,16),(15,17)]:
ratio = ratios[(p1,p2)]
x_offset = (candidate[p1][0] - candidate[p2][0]) * (1. - ratio)
y_offset = (candidate[p1][1] - candidate[p2][1]) * (1. - ratio)
candidate[p2, 0] += x_offset
candidate[p2, 1] += y_offset
# ===== Hips (added) =====
ratio = ratios[(1, 8)]
x_offset = (candidate[1][0] - candidate[8][0]) * (1. - ratio)
y_offset = (candidate[1][1] - candidate[8][1]) * (1. - ratio)
candidate[8, 0] += x_offset
candidate[8, 1] += y_offset
ratio = ratios[(1, 11)]
x_offset = (candidate[1][0] - candidate[11][0]) * (1. - ratio)
y_offset = (candidate[1][1] - candidate[11][1]) * (1. - ratio)
candidate[11, 0] += x_offset
candidate[11, 1] += y_offset
# ===== Legs =====
ratio = ratios[(8, 9)]
x_offset = (candidate[9][0] - candidate[8][0]) * (ratio - 1.)
y_offset = (candidate[9][1] - candidate[8][1]) * (ratio - 1.)
candidate[[9, 10], 0] += x_offset
candidate[[9, 10], 1] += y_offset
ratio = ratios[(9, 10)]
x_offset = (candidate[10][0] - candidate[9][0]) * (ratio - 1.)
y_offset = (candidate[10][1] - candidate[9][1]) * (ratio - 1.)
candidate[10, 0] += x_offset
candidate[10, 1] += y_offset
ratio = ratios[(11, 12)]
x_offset = (candidate[12][0] - candidate[11][0]) * (ratio - 1.)
y_offset = (candidate[12][1] - candidate[11][1]) * (ratio - 1.)
candidate[[12, 13], 0] += x_offset
candidate[[12, 13], 1] += y_offset
ratio = ratios[(12, 13)]
x_offset = (candidate[13][0] - candidate[12][0]) * (ratio - 1.)
y_offset = (candidate[13][1] - candidate[12][1]) * (ratio - 1.)
candidate[13, 0] += x_offset
candidate[13, 1] += y_offset
# rough align
if has_ref_face and 'faces' in pose and pose['faces'] is not None and pose['faces'].size > 0:
try:
src68, to_orig = _to_68x2(pose['faces'])
scaled68 = _face_scale_only(src68, ref68, candidate[0], alpha=1.0)
pose['faces'] = to_orig(scaled68)
except Exception as e:
print("Reference face conversion failed:", e)
continue
return detected_poses