File size: 3,940 Bytes
289e92a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import sys
import numpy as np
from PIL import Image


def log(msg: str):
    print(msg, flush=True)
    sys.stdout.flush()


# MediaPipe landmark indices relevant to each body region
REGION_LANDMARKS = {
    "breast_left":  [11, 12, 23, 24],   # shoulders + hips (torso box)
    "breast_right": [11, 12, 23, 24],
    "buttocks":     [23, 24, 25, 26],   # hips + knees
    "ponytail":     [0,  11, 12],       # nose + shoulders
    "hair":         [0,  11, 12],
}

# All landmarks needed across any region
ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v))


_pose_cache = None


def get_pose():
    global _pose_cache
    if _pose_cache is None:
        log("[Pose] Loading MediaPipe Pose ...")
        import mediapipe as mp
        _pose_cache = mp.solutions.pose.Pose(
            static_image_mode=True,
            model_complexity=2,
            enable_segmentation=False,
        )
        log("[Pose] MediaPipe Pose ready.")
    return _pose_cache


def detect_landmarks(image: Image.Image) -> dict:
    """
    Run MediaPipe Pose on image.
    Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}}
    Coordinates x,y are normalized [0,1] relative to image size.
    """
    import mediapipe as mp
    pose = get_pose()

    img_rgb = np.array(image.convert("RGB"))
    log(f"[Pose] Running pose estimation on {image.size} image ...")
    results = pose.process(img_rgb)

    if not results.pose_landmarks:
        log("[Pose] No pose landmarks detected.")
        return {}

    W, H = image.size
    landmarks = {}
    for idx in ALL_NEEDED:
        lm = results.pose_landmarks.landmark[idx]
        landmarks[idx] = {
            "x": lm.x,          # normalized [0,1]
            "y": lm.y,          # normalized [0,1]
            "z": lm.z,          # relative depth
            "visibility": lm.visibility,
            "px": lm.x * W,     # pixel coords
            "py": lm.y * H,
        }

    log(f"[Pose] Detected {len(landmarks)} landmarks.")
    return landmarks


def compute_region_transform(
    tpose_lm: dict,
    target_lm: dict,
    region: str,
) -> dict:
    """
    Compute a similarity transform (scale + translation) mapping
    T-pose landmark positions → target image landmark positions for a given region.

    Returns {"scale": float, "tx": float, "ty": float}
    All values in normalized [0,1] image space.
    """
    indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24])
    available = [i for i in indices if i in tpose_lm and i in target_lm]

    if len(available) < 2:
        # Not enough landmarks — return identity
        return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0}

    # Centroid of landmark group in each image
    tp_xs = [tpose_lm[i]["x"] for i in available]
    tp_ys = [tpose_lm[i]["y"] for i in available]
    tg_xs = [target_lm[i]["x"] for i in available]
    tg_ys = [target_lm[i]["y"] for i in available]

    tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys)
    tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys)

    # Scale: ratio of bounding box sizes
    tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6)
    tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6)
    scale = tg_spread / tp_spread

    # Translation: move T-pose centroid to target centroid (after scaling)
    tx = tg_cx - tp_cx * scale
    ty = tg_cy - tp_cy * scale

    # Rotation: angle between shoulder vectors (if both shoulders available)
    rotation = 0.0
    if 11 in available and 12 in available:
        tp_angle = np.arctan2(
            tpose_lm[12]["y"] - tpose_lm[11]["y"],
            tpose_lm[12]["x"] - tpose_lm[11]["x"],
        )
        tg_angle = np.arctan2(
            target_lm[12]["y"] - target_lm[11]["y"],
            target_lm[12]["x"] - target_lm[11]["x"],
        )
        rotation = float(tg_angle - tp_angle)

    return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation}