File size: 19,093 Bytes
edd3cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
# https://github.com/ssj9596/One-to-All-Animation

import numpy as np
import copy
from ..retarget_pose import get_retarget_pose

L_EYE_IDXS = list(range(36, 42))
R_EYE_IDXS = list(range(42, 48))
NOSE_TIP = 30
MOUTH_L = 48
MOUTH_R = 54
JAW_LINE = list(range(0, 17))


# ===========================Convert wanpose format into our dwpose-like format======================
def aaposemeta_to_dwpose(meta):
    candidate_body = meta['keypoints_body'][:-2][:, :2]
    score_body = meta['keypoints_body'][:-2][:, 2]
    subset_body = np.arange(len(candidate_body), dtype=float)
    subset_body[score_body <= 0] = -1
    bodies = {
        "candidate": candidate_body,
        "subset": np.expand_dims(subset_body, axis=0),   # shape (1, N)
        "score": np.expand_dims(score_body, axis=0)      # shape (1, N)
    }
    hands_coords = np.stack([
        meta['keypoints_right_hand'][:, :2],
        meta['keypoints_left_hand'][:, :2]
    ])
    hands_score = np.stack([
        meta['keypoints_right_hand'][:, 2],
        meta['keypoints_left_hand'][:, 2]
    ])
    faces_coords = np.expand_dims(meta['keypoints_face'][1:][:, :2], axis=0)
    faces_score = np.expand_dims(meta['keypoints_face'][1:][:, 2], axis=0)
    dwpose_format = {
        "bodies": bodies,
        "hands": hands_coords,
        "hands_score": hands_score,
        "faces": faces_coords,
        "faces_score": faces_score
    }
    return dwpose_format

def aaposemeta_obj_to_dwpose(pose_meta):
    """
    Convert an AAPoseMeta object into a dwpose-like data structure
    Restore coordinates to relative coordinates (divide by width, height)
    Only handle None -> fill with zeros
    """
    w = pose_meta.width
    h = pose_meta.height

    # If None, fill with all zeros
    def safe(arr, like_shape):
        if arr is None:
            return np.zeros(like_shape, dtype=np.float32)
        arr_np = np.array(arr, dtype=np.float32)
        arr_np = np.nan_to_num(arr_np, nan=0.0)
        return arr_np
    # body
    kps_body = safe(pose_meta.kps_body, (pose_meta.kps_body_p.shape[0], 2))
    candidate_body = kps_body / np.array([w, h])
    score_body = safe(pose_meta.kps_body_p, (candidate_body.shape[0],))
    subset_body = np.arange(len(candidate_body), dtype=float)
    subset_body[score_body <= 0] = -1
    bodies = {
        "candidate": candidate_body,
        "subset": np.expand_dims(subset_body, axis=0),
        "score": np.expand_dims(score_body, axis=0)
    }

    # hands
    kps_rhand = safe(pose_meta.kps_rhand, (pose_meta.kps_rhand_p.shape[0], 2))
    kps_lhand = safe(pose_meta.kps_lhand, (pose_meta.kps_lhand_p.shape[0], 2))
    hands_coords = np.stack([
        kps_rhand / np.array([w, h]),
        kps_lhand / np.array([w, h])
    ])
    hands_score = np.stack([
        safe(pose_meta.kps_rhand_p, (kps_rhand.shape[0],)),
        safe(pose_meta.kps_lhand_p, (kps_lhand.shape[0],))
    ])

    dwpose_format = {
        "bodies": bodies,
        "hands": hands_coords,
        "hands_score": hands_score,
        "faces": None,
        "faces_score": None
    }
    return dwpose_format

# ===============================Face Rough alignment======================

def _to_68x2(arr):
    if arr.shape == (1, 68, 2):
        def to_orig(x):
            x = np.asarray(x, dtype=np.float64)
            if x.shape != (68, 2):
                raise ValueError("to_orig expects (68,2)")
            return x[np.newaxis, :, :]
        return arr[0].astype(np.float64), to_orig
    if arr.shape == (68, 2):
        def to_orig(x):
            x = np.asarray(x, dtype=np.float64)
            if x.shape != (68, 2):
                raise ValueError("to_orig expects (68,2)")
            return x
        return arr.astype(np.float64), to_orig
    if arr.shape == (2, 68):
        def to_orig(x):
            x = np.asarray(x, dtype=np.float64)
            if x.shape != (68, 2):
                raise ValueError("to_orig expects (68,2)")
            return x.T
        return arr.T.astype(np.float64), to_orig
    raise ValueError(f"faces shape {arr.shape} not supported; expected (1,68,2) or (68,2) or (2,68)")

def _eye_center(face68, idxs):
    return face68[idxs].mean(axis=0)

def _anchors(face68):
    le = _eye_center(face68, L_EYE_IDXS)
    re = _eye_center(face68, R_EYE_IDXS)
    nose = face68[NOSE_TIP]
    lm = face68[MOUTH_L]
    rm = face68[MOUTH_R]
    if re[0] < le[0]:
        le, re = re, le
    return np.stack([le, re, nose, lm, rm], axis=0)

def _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0, anchor_pairs=[[36, 45], [27, 8]]):
    """
    Rough alignment - adjust the shape of the source face according to the proportions of the reference, and align the nose tip to target_nose_pos.
    anchor_pairs:
      - [36, 45] for x
      - [27, 8] for y
    """
    src = np.asarray(src68, dtype=np.float64)
    ref = np.asarray(ref68, dtype=np.float64)

    center = _anchors(src).mean(axis=0)
    src_centered = src - center

    src_w = np.linalg.norm(src[anchor_pairs[0][0]] - src[anchor_pairs[0][1]])
    ref_w = np.linalg.norm(ref[anchor_pairs[0][0]] - ref[anchor_pairs[0][1]])

    src_h = np.linalg.norm(src[anchor_pairs[1][0]] - src[anchor_pairs[1][1]])
    ref_h = np.linalg.norm(ref[anchor_pairs[1][0]] - ref[anchor_pairs[1][1]])

    scale_x = ref_w / src_w if src_w > 1e-6 else 1.0
    scale_y = ref_h / src_h if src_h > 1e-6 else 1.0

    scaled_local = src_centered.copy()
    scaled_local[:, 0] *= (1 - alpha) + scale_x * alpha
    scaled_local[:, 1] *= (1 - alpha) + scale_y * alpha
    scaled_global = scaled_local + center

    nose_idx = NOSE_TIP
    current_nose = scaled_global[nose_idx]
    offset = target_nose_pos - current_nose
    scaled_global += offset

    return scaled_global

# ===============================Reference Img Pre-Process======================


def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False):
    aligned_pose = copy.deepcopy(tgt_pose)
    th = 1e-6
    ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
    tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32)

    ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1)
    tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1)

    ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th)
    tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th)
    shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid

    ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th)
    tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th)
    hip_ok = ref_hip_valid and tgt_hip_valid

    if shoulder_ok and hip_ok:
        ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0])
        tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0])
        x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0

        ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1]))
        tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1]))
        y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0
        scale_ratio = (x_ratio + y_ratio) / 2

    elif shoulder_ok:
        ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5])
        tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5])
        scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0

    else:
        ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17])
        tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17])
        scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0

    if return_ratio:
        return scale_ratio

    # scale
    anchor_idx = 1
    anchor_pt_before_scale = tgt_kpt[anchor_idx].copy()
    def scale(arr):
        if arr is not None and arr.size > 0:
            arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio
            arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio
    scale(tgt_kpt)
    scale(aligned_pose.get('faces'))
    scale(aligned_pose.get('hands'))

    # offset
    offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx]
    def translate(arr):
        if arr is not None and arr.size > 0:
            arr += offset
    translate(tgt_kpt)
    translate(aligned_pose.get('faces'))
    translate(aligned_pose.get('hands'))
    aligned_pose['bodies']['candidate'] = tgt_kpt

    return aligned_pose, shoulder_ok, hip_ok

# ===============================Align to Ref Driven Pose Retarget ======================

def align_to_reference(ref_pose_meta, tpl_pose_metas, tpl_dwposes, anchor_idx=None):
    # pose retarget + face rough align

    ref_pose_dw = aaposemeta_to_dwpose(ref_pose_meta)
    best_idx = anchor_idx
    tpl_pose_meta_best = tpl_pose_metas[best_idx]

    tpl_retarget_pose_metas = get_retarget_pose(
        tpl_pose_meta_best,
        ref_pose_meta,
        tpl_pose_metas,
        None, None
    )

    retarget_dwposes = [aaposemeta_obj_to_dwpose(pm) for pm in tpl_retarget_pose_metas]

    if ref_pose_dw['faces'] is not None:
        ref68, _ = _to_68x2(ref_pose_dw['faces'])
        for frame_idx, (tpl_dw, rt_dw) in enumerate(zip(tpl_dwposes, retarget_dwposes)):
            if tpl_dw['faces'] is None:
                continue
            src68, to_orig = _to_68x2(tpl_dw['faces'])
            target_nose_pos = rt_dw['bodies']['candidate'][0]
            scaled68 = _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0)
            rt_dw['faces'] = to_orig(scaled68)
            rt_dw['faces_score'] = tpl_dw['faces_score']

    return retarget_dwposes

# ===============================Rescale-Ref && Change part of pose(Option)======================


def compute_ratios_stepwise(ref_scores, source_scores, ref_pts, src_pts, conf_th=0.9, th=1e-6):

    def keypoint_valid(idx):
        return ref_scores[0, idx] >= conf_th and source_scores[0, idx] >= conf_th

    def safe_ratio(p1, p2):
        len_ref = np.linalg.norm(ref_pts[p1] - ref_pts[p2])
        len_src = np.linalg.norm(src_pts[p1] - src_pts[p2])
        if len_src > th:
            return len_ref / len_src
        else:
            return 1.0

    ratio_pairs = [
        (0,1),(1,2),(1,5),(2,3),(3,4),(5,6),(6,7),
        (0,14),(0,15),(14,16),(15,17),
        (8,9),(9,10),(11,12),(12,13),
        (1,8),(1,11)
    ]
    ratios = {p: 1.0 for p in ratio_pairs}

    parent_map = {
        (3, 4): (2, 3),
        (6, 7): (5, 6),
        (9, 10): (8, 9),
        (12, 13): (11, 12)
    }

    # Group 1 — head only
    if all(keypoint_valid(i) for i in [0,1,14,15,16,17]):
        ratios[(0,1)]  = safe_ratio(0,1)
        ratios[(0,14)] = safe_ratio(0,14)
        ratios[(0,15)] = safe_ratio(0,15)
        ratios[(14,16)]= safe_ratio(14,16)
        ratios[(15,17)]= safe_ratio(15,17)

    # Group 2 — +shoulder
    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17]):
        ratios[(1,2)] = safe_ratio(1,2)
        ratios[(1,5)] = safe_ratio(1,5)

    # Group 3 — +upper arm
    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6]):
        ratios[(2,3)] = safe_ratio(2,3)
        ratios[(5,6)] = safe_ratio(5,6)
        ratios[(3,4)] = ratios[parent_map[(3,4)]]
        ratios[(6,7)] = ratios[parent_map[(6,7)]]

    # Group 4 — +hips
    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11]):
        ratios[(1,8)] = safe_ratio(1,8)
        ratios[(1,11)] = safe_ratio(1,11)

    # Group 5 — forearm own
    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7]):
        ratios[(3,4)] = safe_ratio(3,4)
        ratios[(6,7)] = safe_ratio(6,7)

    # Group 6 — knees
    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7,9,12]):
        ratios[(8,9)] = safe_ratio(8,9)
        ratios[(11,12)] = safe_ratio(11,12)
        ratios[(9,10)] = ratios[parent_map[(9,10)]]
        ratios[(12,13)]= ratios[parent_map[(12,13)]]

    # Full body — all ratios
    if all(keypoint_valid(i) for i in range(18)):
        for p in ratio_pairs:
            ratios[p] = safe_ratio(*p)

    symmetric_pairs = [
        ((1, 2), (1, 5)),    # 两肩
        ((2, 3), (5, 6)),    # 上臂
        ((3, 4), (6, 7)),    # 前臂
        ((8, 9), (11, 12)),  # 大腿
        ((9, 10), (12, 13))  # 小腿
    ]
    for left_key, right_key in symmetric_pairs:
        left_val = ratios.get(left_key)
        right_val = ratios.get(right_key)
        if left_val is not None and right_val is not None:
            avg_val = (left_val + right_val) / 2.0
            ratios[left_key] = avg_val
            ratios[right_key] = avg_val

    eye_pairs = [
        ((13, 15), (14, 16))
    ]
    for left_key, right_key in eye_pairs:
        left_val = ratios.get(left_key)
        right_val = ratios.get(right_key)
        if left_val is not None and right_val is not None:
            avg_val = (left_val + right_val) / 2.0
            ratios[left_key] = avg_val
            ratios[right_key] = avg_val

    return ratios

def align_to_pose(ref_dwpose, tpl_dwposes,anchor_idx=None,conf_th=0.9,):
    detected_poses = copy.deepcopy(tpl_dwposes)

    best_pose = tpl_dwposes[anchor_idx]
    ref_pose_scaled, _, _ = scale_and_translate_pose(ref_dwpose, best_pose, conf_th=conf_th)

    ref_candidate = ref_pose_scaled['bodies']['candidate'].astype(np.float32)
    ref_scores    = ref_pose_scaled['bodies']['score'].astype(np.float32)

    source_candidate = best_pose['bodies']['candidate'].astype(np.float32)
    source_scores = best_pose['bodies']['score'].astype(np.float32)

    has_ref_face = 'faces' in ref_pose_scaled and ref_pose_scaled['faces'] is not None and ref_pose_scaled['faces'].size > 0
    if has_ref_face:
        try:
            ref68, _ = _to_68x2(ref_pose_scaled['faces'])
        except Exception as e:
            print("Reference face conversion failed:", e)
            has_ref_face = False

    ratios = compute_ratios_stepwise(ref_scores, source_scores, ref_candidate, source_candidate, conf_th=conf_th, th=1e-6)

    for pose in detected_poses:
        candidate = pose['bodies']['candidate']
        hands = pose['hands']

        # ===== Neck =====
        ratio = ratios[(0, 1)]
        x_offset = (candidate[1][0] - candidate[0][0]) * (1. - ratio)
        y_offset = (candidate[1][1] - candidate[0][1]) * (1. - ratio)
        candidate[[0, 14, 15, 16, 17], 0] += x_offset
        candidate[[0, 14, 15, 16, 17], 1] += y_offset

        # ===== Shoulder Right =====
        ratio = ratios[(1, 2)]
        x_offset = (candidate[1][0] - candidate[2][0]) * (1. - ratio)
        y_offset = (candidate[1][1] - candidate[2][1]) * (1. - ratio)
        candidate[[2, 3, 4], 0] += x_offset
        candidate[[2, 3, 4], 1] += y_offset
        hands[1, :, 0] += x_offset
        hands[1, :, 1] += y_offset

        # ===== Shoulder Left =====
        ratio = ratios[(1, 5)]
        x_offset = (candidate[1][0] - candidate[5][0]) * (1. - ratio)
        y_offset = (candidate[1][1] - candidate[5][1]) * (1. - ratio)
        candidate[[5, 6, 7], 0] += x_offset
        candidate[[5, 6, 7], 1] += y_offset
        hands[0, :, 0] += x_offset
        hands[0, :, 1] += y_offset

        # ===== Upper Arm Right =====
        ratio = ratios[(2, 3)]
        x_offset = (candidate[2][0] - candidate[3][0]) * (1. - ratio)
        y_offset = (candidate[2][1] - candidate[3][1]) * (1. - ratio)
        candidate[[3, 4], 0] += x_offset
        candidate[[3, 4], 1] += y_offset
        hands[1, :, 0] += x_offset
        hands[1, :, 1] += y_offset

        # ===== Forearm Right =====
        ratio = ratios[(3, 4)]
        x_offset = (candidate[3][0] - candidate[4][0]) * (1. - ratio)
        y_offset = (candidate[3][1] - candidate[4][1]) * (1. - ratio)
        candidate[4, 0] += x_offset
        candidate[4, 1] += y_offset
        hands[1, :, 0] += x_offset
        hands[1, :, 1] += y_offset

        # ===== Upper Arm Left =====
        ratio = ratios[(5, 6)]
        x_offset = (candidate[5][0] - candidate[6][0]) * (1. - ratio)
        y_offset = (candidate[5][1] - candidate[6][1]) * (1. - ratio)
        candidate[[6, 7], 0] += x_offset
        candidate[[6, 7], 1] += y_offset
        hands[0, :, 0] += x_offset
        hands[0, :, 1] += y_offset

        # ===== Forearm Left =====
        ratio = ratios[(6, 7)]
        x_offset = (candidate[6][0] - candidate[7][0]) * (1. - ratio)
        y_offset = (candidate[6][1] - candidate[7][1]) * (1. - ratio)
        candidate[7, 0] += x_offset
        candidate[7, 1] += y_offset
        hands[0, :, 0] += x_offset
        hands[0, :, 1] += y_offset

        # ===== Head parts =====
        for (p1, p2) in [(0,14),(0,15),(14,16),(15,17)]:
            ratio = ratios[(p1,p2)]
            x_offset = (candidate[p1][0] - candidate[p2][0]) * (1. - ratio)
            y_offset = (candidate[p1][1] - candidate[p2][1]) * (1. - ratio)
            candidate[p2, 0] += x_offset
            candidate[p2, 1] += y_offset

        # ===== Hips (added) =====
        ratio = ratios[(1, 8)]
        x_offset = (candidate[1][0] - candidate[8][0]) * (1. - ratio)
        y_offset = (candidate[1][1] - candidate[8][1]) * (1. - ratio)
        candidate[8, 0] += x_offset
        candidate[8, 1] += y_offset

        ratio = ratios[(1, 11)]
        x_offset = (candidate[1][0] - candidate[11][0]) * (1. - ratio)
        y_offset = (candidate[1][1] - candidate[11][1]) * (1. - ratio)
        candidate[11, 0] += x_offset
        candidate[11, 1] += y_offset

        # ===== Legs =====
        ratio = ratios[(8, 9)]
        x_offset = (candidate[9][0] - candidate[8][0]) * (ratio - 1.)
        y_offset = (candidate[9][1] - candidate[8][1]) * (ratio - 1.)
        candidate[[9, 10], 0] += x_offset
        candidate[[9, 10], 1] += y_offset

        ratio = ratios[(9, 10)]
        x_offset = (candidate[10][0] - candidate[9][0]) * (ratio - 1.)
        y_offset = (candidate[10][1] - candidate[9][1]) * (ratio - 1.)
        candidate[10, 0] += x_offset
        candidate[10, 1] += y_offset

        ratio = ratios[(11, 12)]
        x_offset = (candidate[12][0] - candidate[11][0]) * (ratio - 1.)
        y_offset = (candidate[12][1] - candidate[11][1]) * (ratio - 1.)
        candidate[[12, 13], 0] += x_offset
        candidate[[12, 13], 1] += y_offset

        ratio = ratios[(12, 13)]
        x_offset = (candidate[13][0] - candidate[12][0]) * (ratio - 1.)
        y_offset = (candidate[13][1] - candidate[12][1]) * (ratio - 1.)
        candidate[13, 0] += x_offset
        candidate[13, 1] += y_offset

        # rough align
        if has_ref_face and 'faces' in pose and pose['faces'] is not None and pose['faces'].size > 0:
            try:
                src68, to_orig = _to_68x2(pose['faces'])
                scaled68 = _face_scale_only(src68, ref68, candidate[0], alpha=1.0)
                pose['faces'] = to_orig(scaled68)
            except Exception as e:
                print("Reference face conversion failed:", e)
                continue

    return detected_poses