Upload folder using huggingface_hub

edd3cd4 verified 13 days ago

19.1 kB

	# https://github.com/ssj9596/One-to-All-Animation

	import numpy as np
	import copy
	from ..retarget_pose import get_retarget_pose

	L_EYE_IDXS = list(range(36, 42))
	R_EYE_IDXS = list(range(42, 48))
	NOSE_TIP = 30
	MOUTH_L = 48
	MOUTH_R = 54
	JAW_LINE = list(range(0, 17))


	# ===========================Convert wanpose format into our dwpose-like format======================
	def aaposemeta_to_dwpose(meta):
	candidate_body = meta['keypoints_body'][:-2][:, :2]
	score_body = meta['keypoints_body'][:-2][:, 2]
	subset_body = np.arange(len(candidate_body), dtype=float)
	subset_body[score_body <= 0] = -1
	bodies = {
	"candidate": candidate_body,
	"subset": np.expand_dims(subset_body, axis=0), # shape (1, N)
	"score": np.expand_dims(score_body, axis=0) # shape (1, N)
	}
	hands_coords = np.stack([
	meta['keypoints_right_hand'][:, :2],
	meta['keypoints_left_hand'][:, :2]
	])
	hands_score = np.stack([
	meta['keypoints_right_hand'][:, 2],
	meta['keypoints_left_hand'][:, 2]
	])
	faces_coords = np.expand_dims(meta['keypoints_face'][1:][:, :2], axis=0)
	faces_score = np.expand_dims(meta['keypoints_face'][1:][:, 2], axis=0)
	dwpose_format = {
	"bodies": bodies,
	"hands": hands_coords,
	"hands_score": hands_score,
	"faces": faces_coords,
	"faces_score": faces_score
	}
	return dwpose_format

	def aaposemeta_obj_to_dwpose(pose_meta):
	"""
	Convert an AAPoseMeta object into a dwpose-like data structure
	Restore coordinates to relative coordinates (divide by width, height)
	Only handle None -> fill with zeros
	"""
	w = pose_meta.width
	h = pose_meta.height

	# If None, fill with all zeros
	def safe(arr, like_shape):
	if arr is None:
	return np.zeros(like_shape, dtype=np.float32)
	arr_np = np.array(arr, dtype=np.float32)
	arr_np = np.nan_to_num(arr_np, nan=0.0)
	return arr_np
	# body
	kps_body = safe(pose_meta.kps_body, (pose_meta.kps_body_p.shape[0], 2))
	candidate_body = kps_body / np.array([w, h])
	score_body = safe(pose_meta.kps_body_p, (candidate_body.shape[0],))
	subset_body = np.arange(len(candidate_body), dtype=float)
	subset_body[score_body <= 0] = -1
	bodies = {
	"candidate": candidate_body,
	"subset": np.expand_dims(subset_body, axis=0),
	"score": np.expand_dims(score_body, axis=0)
	}

	# hands
	kps_rhand = safe(pose_meta.kps_rhand, (pose_meta.kps_rhand_p.shape[0], 2))
	kps_lhand = safe(pose_meta.kps_lhand, (pose_meta.kps_lhand_p.shape[0], 2))
	hands_coords = np.stack([
	kps_rhand / np.array([w, h]),
	kps_lhand / np.array([w, h])
	])
	hands_score = np.stack([
	safe(pose_meta.kps_rhand_p, (kps_rhand.shape[0],)),
	safe(pose_meta.kps_lhand_p, (kps_lhand.shape[0],))
	])

	dwpose_format = {
	"bodies": bodies,
	"hands": hands_coords,
	"hands_score": hands_score,
	"faces": None,
	"faces_score": None
	}
	return dwpose_format

	# ===============================Face Rough alignment======================

	def _to_68x2(arr):
	if arr.shape == (1, 68, 2):
	def to_orig(x):
	x = np.asarray(x, dtype=np.float64)
	if x.shape != (68, 2):
	raise ValueError("to_orig expects (68,2)")
	return x[np.newaxis, :, :]
	return arr[0].astype(np.float64), to_orig
	if arr.shape == (68, 2):
	def to_orig(x):
	x = np.asarray(x, dtype=np.float64)
	if x.shape != (68, 2):
	raise ValueError("to_orig expects (68,2)")
	return x
	return arr.astype(np.float64), to_orig
	if arr.shape == (2, 68):
	def to_orig(x):
	x = np.asarray(x, dtype=np.float64)
	if x.shape != (68, 2):
	raise ValueError("to_orig expects (68,2)")
	return x.T
	return arr.T.astype(np.float64), to_orig
	raise ValueError(f"faces shape {arr.shape} not supported; expected (1,68,2) or (68,2) or (2,68)")

	def _eye_center(face68, idxs):
	return face68[idxs].mean(axis=0)

	def _anchors(face68):
	le = _eye_center(face68, L_EYE_IDXS)
	re = _eye_center(face68, R_EYE_IDXS)
	nose = face68[NOSE_TIP]
	lm = face68[MOUTH_L]
	rm = face68[MOUTH_R]
	if re[0] < le[0]:
	le, re = re, le
	return np.stack([le, re, nose, lm, rm], axis=0)

	def _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0, anchor_pairs=[[36, 45], [27, 8]]):
	"""
	Rough alignment - adjust the shape of the source face according to the proportions of the reference, and align the nose tip to target_nose_pos.
	anchor_pairs:
	- [36, 45] for x
	- [27, 8] for y
	"""
	src = np.asarray(src68, dtype=np.float64)
	ref = np.asarray(ref68, dtype=np.float64)

	center = _anchors(src).mean(axis=0)
	src_centered = src - center

	src_w = np.linalg.norm(src[anchor_pairs[0][0]] - src[anchor_pairs[0][1]])
	ref_w = np.linalg.norm(ref[anchor_pairs[0][0]] - ref[anchor_pairs[0][1]])

	src_h = np.linalg.norm(src[anchor_pairs[1][0]] - src[anchor_pairs[1][1]])
	ref_h = np.linalg.norm(ref[anchor_pairs[1][0]] - ref[anchor_pairs[1][1]])

	scale_x = ref_w / src_w if src_w > 1e-6 else 1.0
	scale_y = ref_h / src_h if src_h > 1e-6 else 1.0

	scaled_local = src_centered.copy()
	scaled_local[:, 0] = (1 - alpha) + scale_x alpha
	scaled_local[:, 1] = (1 - alpha) + scale_y alpha
	scaled_global = scaled_local + center

	nose_idx = NOSE_TIP
	current_nose = scaled_global[nose_idx]
	offset = target_nose_pos - current_nose
	scaled_global += offset

	return scaled_global

	# ===============================Reference Img Pre-Process======================


	def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False):
	aligned_pose = copy.deepcopy(tgt_pose)
	th = 1e-6
	ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
	tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32)

	ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1)
	tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1)

	ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th)
	tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th)
	shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid

	ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th)
	tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th)
	hip_ok = ref_hip_valid and tgt_hip_valid

	if shoulder_ok and hip_ok:
	ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0])
	tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0])
	x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0

	ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1]))
	tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1]))
	y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0
	scale_ratio = (x_ratio + y_ratio) / 2

	elif shoulder_ok:
	ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5])
	tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5])
	scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0

	else:
	ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17])
	tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17])
	scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0

	if return_ratio:
	return scale_ratio

	# scale
	anchor_idx = 1
	anchor_pt_before_scale = tgt_kpt[anchor_idx].copy()
	def scale(arr):
	if arr is not None and arr.size > 0:
	arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio
	arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio
	scale(tgt_kpt)
	scale(aligned_pose.get('faces'))
	scale(aligned_pose.get('hands'))

	# offset
	offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx]
	def translate(arr):
	if arr is not None and arr.size > 0:
	arr += offset
	translate(tgt_kpt)
	translate(aligned_pose.get('faces'))
	translate(aligned_pose.get('hands'))
	aligned_pose['bodies']['candidate'] = tgt_kpt

	return aligned_pose, shoulder_ok, hip_ok

	# ===============================Align to Ref Driven Pose Retarget ======================

	def align_to_reference(ref_pose_meta, tpl_pose_metas, tpl_dwposes, anchor_idx=None):
	# pose retarget + face rough align

	ref_pose_dw = aaposemeta_to_dwpose(ref_pose_meta)
	best_idx = anchor_idx
	tpl_pose_meta_best = tpl_pose_metas[best_idx]

	tpl_retarget_pose_metas = get_retarget_pose(
	tpl_pose_meta_best,
	ref_pose_meta,
	tpl_pose_metas,
	None, None
	)

	retarget_dwposes = [aaposemeta_obj_to_dwpose(pm) for pm in tpl_retarget_pose_metas]

	if ref_pose_dw['faces'] is not None:
	ref68, _ = _to_68x2(ref_pose_dw['faces'])
	for frame_idx, (tpl_dw, rt_dw) in enumerate(zip(tpl_dwposes, retarget_dwposes)):
	if tpl_dw['faces'] is None:
	continue
	src68, to_orig = _to_68x2(tpl_dw['faces'])
	target_nose_pos = rt_dw['bodies']['candidate'][0]
	scaled68 = _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0)
	rt_dw['faces'] = to_orig(scaled68)
	rt_dw['faces_score'] = tpl_dw['faces_score']

	return retarget_dwposes

	# ===============================Rescale-Ref && Change part of pose(Option)======================


	def compute_ratios_stepwise(ref_scores, source_scores, ref_pts, src_pts, conf_th=0.9, th=1e-6):

	def keypoint_valid(idx):
	return ref_scores[0, idx] >= conf_th and source_scores[0, idx] >= conf_th

	def safe_ratio(p1, p2):
	len_ref = np.linalg.norm(ref_pts[p1] - ref_pts[p2])
	len_src = np.linalg.norm(src_pts[p1] - src_pts[p2])
	if len_src > th:
	return len_ref / len_src
	else:
	return 1.0

	ratio_pairs = [
	(0,1),(1,2),(1,5),(2,3),(3,4),(5,6),(6,7),
	(0,14),(0,15),(14,16),(15,17),
	(8,9),(9,10),(11,12),(12,13),
	(1,8),(1,11)
	]
	ratios = {p: 1.0 for p in ratio_pairs}

	parent_map = {
	(3, 4): (2, 3),
	(6, 7): (5, 6),
	(9, 10): (8, 9),
	(12, 13): (11, 12)
	}

	# Group 1 — head only
	if all(keypoint_valid(i) for i in [0,1,14,15,16,17]):
	ratios[(0,1)] = safe_ratio(0,1)
	ratios[(0,14)] = safe_ratio(0,14)
	ratios[(0,15)] = safe_ratio(0,15)
	ratios[(14,16)]= safe_ratio(14,16)
	ratios[(15,17)]= safe_ratio(15,17)

	# Group 2 — +shoulder
	if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17]):
	ratios[(1,2)] = safe_ratio(1,2)
	ratios[(1,5)] = safe_ratio(1,5)

	# Group 3 — +upper arm
	if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6]):
	ratios[(2,3)] = safe_ratio(2,3)
	ratios[(5,6)] = safe_ratio(5,6)
	ratios[(3,4)] = ratios[parent_map[(3,4)]]
	ratios[(6,7)] = ratios[parent_map[(6,7)]]

	# Group 4 — +hips
	if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11]):
	ratios[(1,8)] = safe_ratio(1,8)
	ratios[(1,11)] = safe_ratio(1,11)

	# Group 5 — forearm own
	if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7]):
	ratios[(3,4)] = safe_ratio(3,4)
	ratios[(6,7)] = safe_ratio(6,7)

	# Group 6 — knees
	if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7,9,12]):
	ratios[(8,9)] = safe_ratio(8,9)
	ratios[(11,12)] = safe_ratio(11,12)
	ratios[(9,10)] = ratios[parent_map[(9,10)]]
	ratios[(12,13)]= ratios[parent_map[(12,13)]]

	# Full body — all ratios
	if all(keypoint_valid(i) for i in range(18)):
	for p in ratio_pairs:
	ratios[p] = safe_ratio(*p)

	symmetric_pairs = [
	((1, 2), (1, 5)), # 两肩
	((2, 3), (5, 6)), # 上臂
	((3, 4), (6, 7)), # 前臂
	((8, 9), (11, 12)), # 大腿
	((9, 10), (12, 13)) # 小腿
	]
	for left_key, right_key in symmetric_pairs:
	left_val = ratios.get(left_key)
	right_val = ratios.get(right_key)
	if left_val is not None and right_val is not None:
	avg_val = (left_val + right_val) / 2.0
	ratios[left_key] = avg_val
	ratios[right_key] = avg_val

	eye_pairs = [
	((13, 15), (14, 16))
	]
	for left_key, right_key in eye_pairs:
	left_val = ratios.get(left_key)
	right_val = ratios.get(right_key)
	if left_val is not None and right_val is not None:
	avg_val = (left_val + right_val) / 2.0
	ratios[left_key] = avg_val
	ratios[right_key] = avg_val

	return ratios

	def align_to_pose(ref_dwpose, tpl_dwposes,anchor_idx=None,conf_th=0.9,):
	detected_poses = copy.deepcopy(tpl_dwposes)

	best_pose = tpl_dwposes[anchor_idx]
	ref_pose_scaled, _, _ = scale_and_translate_pose(ref_dwpose, best_pose, conf_th=conf_th)

	ref_candidate = ref_pose_scaled['bodies']['candidate'].astype(np.float32)
	ref_scores = ref_pose_scaled['bodies']['score'].astype(np.float32)

	source_candidate = best_pose['bodies']['candidate'].astype(np.float32)
	source_scores = best_pose['bodies']['score'].astype(np.float32)

	has_ref_face = 'faces' in ref_pose_scaled and ref_pose_scaled['faces'] is not None and ref_pose_scaled['faces'].size > 0
	if has_ref_face:
	try:
	ref68, _ = _to_68x2(ref_pose_scaled['faces'])
	except Exception as e:
	print("Reference face conversion failed:", e)
	has_ref_face = False

	ratios = compute_ratios_stepwise(ref_scores, source_scores, ref_candidate, source_candidate, conf_th=conf_th, th=1e-6)

	for pose in detected_poses:
	candidate = pose['bodies']['candidate']
	hands = pose['hands']

	# ===== Neck =====
	ratio = ratios[(0, 1)]
	x_offset = (candidate[1][0] - candidate[0][0]) * (1. - ratio)
	y_offset = (candidate[1][1] - candidate[0][1]) * (1. - ratio)
	candidate[[0, 14, 15, 16, 17], 0] += x_offset
	candidate[[0, 14, 15, 16, 17], 1] += y_offset

	# ===== Shoulder Right =====
	ratio = ratios[(1, 2)]
	x_offset = (candidate[1][0] - candidate[2][0]) * (1. - ratio)
	y_offset = (candidate[1][1] - candidate[2][1]) * (1. - ratio)
	candidate[[2, 3, 4], 0] += x_offset
	candidate[[2, 3, 4], 1] += y_offset
	hands[1, :, 0] += x_offset
	hands[1, :, 1] += y_offset

	# ===== Shoulder Left =====
	ratio = ratios[(1, 5)]
	x_offset = (candidate[1][0] - candidate[5][0]) * (1. - ratio)
	y_offset = (candidate[1][1] - candidate[5][1]) * (1. - ratio)
	candidate[[5, 6, 7], 0] += x_offset
	candidate[[5, 6, 7], 1] += y_offset
	hands[0, :, 0] += x_offset
	hands[0, :, 1] += y_offset

	# ===== Upper Arm Right =====
	ratio = ratios[(2, 3)]
	x_offset = (candidate[2][0] - candidate[3][0]) * (1. - ratio)
	y_offset = (candidate[2][1] - candidate[3][1]) * (1. - ratio)
	candidate[[3, 4], 0] += x_offset
	candidate[[3, 4], 1] += y_offset
	hands[1, :, 0] += x_offset
	hands[1, :, 1] += y_offset

	# ===== Forearm Right =====
	ratio = ratios[(3, 4)]
	x_offset = (candidate[3][0] - candidate[4][0]) * (1. - ratio)
	y_offset = (candidate[3][1] - candidate[4][1]) * (1. - ratio)
	candidate[4, 0] += x_offset
	candidate[4, 1] += y_offset
	hands[1, :, 0] += x_offset
	hands[1, :, 1] += y_offset

	# ===== Upper Arm Left =====
	ratio = ratios[(5, 6)]
	x_offset = (candidate[5][0] - candidate[6][0]) * (1. - ratio)
	y_offset = (candidate[5][1] - candidate[6][1]) * (1. - ratio)
	candidate[[6, 7], 0] += x_offset
	candidate[[6, 7], 1] += y_offset
	hands[0, :, 0] += x_offset
	hands[0, :, 1] += y_offset

	# ===== Forearm Left =====
	ratio = ratios[(6, 7)]
	x_offset = (candidate[6][0] - candidate[7][0]) * (1. - ratio)
	y_offset = (candidate[6][1] - candidate[7][1]) * (1. - ratio)
	candidate[7, 0] += x_offset
	candidate[7, 1] += y_offset
	hands[0, :, 0] += x_offset
	hands[0, :, 1] += y_offset

	# ===== Head parts =====
	for (p1, p2) in [(0,14),(0,15),(14,16),(15,17)]:
	ratio = ratios[(p1,p2)]
	x_offset = (candidate[p1][0] - candidate[p2][0]) * (1. - ratio)
	y_offset = (candidate[p1][1] - candidate[p2][1]) * (1. - ratio)
	candidate[p2, 0] += x_offset
	candidate[p2, 1] += y_offset

	# ===== Hips (added) =====
	ratio = ratios[(1, 8)]
	x_offset = (candidate[1][0] - candidate[8][0]) * (1. - ratio)
	y_offset = (candidate[1][1] - candidate[8][1]) * (1. - ratio)
	candidate[8, 0] += x_offset
	candidate[8, 1] += y_offset

	ratio = ratios[(1, 11)]
	x_offset = (candidate[1][0] - candidate[11][0]) * (1. - ratio)
	y_offset = (candidate[1][1] - candidate[11][1]) * (1. - ratio)
	candidate[11, 0] += x_offset
	candidate[11, 1] += y_offset

	# ===== Legs =====
	ratio = ratios[(8, 9)]
	x_offset = (candidate[9][0] - candidate[8][0]) * (ratio - 1.)
	y_offset = (candidate[9][1] - candidate[8][1]) * (ratio - 1.)
	candidate[[9, 10], 0] += x_offset
	candidate[[9, 10], 1] += y_offset

	ratio = ratios[(9, 10)]
	x_offset = (candidate[10][0] - candidate[9][0]) * (ratio - 1.)
	y_offset = (candidate[10][1] - candidate[9][1]) * (ratio - 1.)
	candidate[10, 0] += x_offset
	candidate[10, 1] += y_offset

	ratio = ratios[(11, 12)]
	x_offset = (candidate[12][0] - candidate[11][0]) * (ratio - 1.)
	y_offset = (candidate[12][1] - candidate[11][1]) * (ratio - 1.)
	candidate[[12, 13], 0] += x_offset
	candidate[[12, 13], 1] += y_offset

	ratio = ratios[(12, 13)]
	x_offset = (candidate[13][0] - candidate[12][0]) * (ratio - 1.)
	y_offset = (candidate[13][1] - candidate[12][1]) * (ratio - 1.)
	candidate[13, 0] += x_offset
	candidate[13, 1] += y_offset

	# rough align
	if has_ref_face and 'faces' in pose and pose['faces'] is not None and pose['faces'].size > 0:
	try:
	src68, to_orig = _to_68x2(pose['faces'])
	scaled68 = _face_scale_only(src68, ref68, candidate[0], alpha=1.0)
	pose['faces'] = to_orig(scaled68)
	except Exception as e:
	print("Reference face conversion failed:", e)
	continue

	return detected_poses