jiggle-physics / pose.py
Justin Wood
Add two-photo workflow: MediaPipe pose detection + /api/pose endpoint
289e92a
import sys
import numpy as np
from PIL import Image
def log(msg: str):
print(msg, flush=True)
sys.stdout.flush()
# MediaPipe landmark indices relevant to each body region
REGION_LANDMARKS = {
"breast_left": [11, 12, 23, 24], # shoulders + hips (torso box)
"breast_right": [11, 12, 23, 24],
"buttocks": [23, 24, 25, 26], # hips + knees
"ponytail": [0, 11, 12], # nose + shoulders
"hair": [0, 11, 12],
}
# All landmarks needed across any region
ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v))
_pose_cache = None
def get_pose():
global _pose_cache
if _pose_cache is None:
log("[Pose] Loading MediaPipe Pose ...")
import mediapipe as mp
_pose_cache = mp.solutions.pose.Pose(
static_image_mode=True,
model_complexity=2,
enable_segmentation=False,
)
log("[Pose] MediaPipe Pose ready.")
return _pose_cache
def detect_landmarks(image: Image.Image) -> dict:
"""
Run MediaPipe Pose on image.
Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}}
Coordinates x,y are normalized [0,1] relative to image size.
"""
import mediapipe as mp
pose = get_pose()
img_rgb = np.array(image.convert("RGB"))
log(f"[Pose] Running pose estimation on {image.size} image ...")
results = pose.process(img_rgb)
if not results.pose_landmarks:
log("[Pose] No pose landmarks detected.")
return {}
W, H = image.size
landmarks = {}
for idx in ALL_NEEDED:
lm = results.pose_landmarks.landmark[idx]
landmarks[idx] = {
"x": lm.x, # normalized [0,1]
"y": lm.y, # normalized [0,1]
"z": lm.z, # relative depth
"visibility": lm.visibility,
"px": lm.x * W, # pixel coords
"py": lm.y * H,
}
log(f"[Pose] Detected {len(landmarks)} landmarks.")
return landmarks
def compute_region_transform(
tpose_lm: dict,
target_lm: dict,
region: str,
) -> dict:
"""
Compute a similarity transform (scale + translation) mapping
T-pose landmark positions → target image landmark positions for a given region.
Returns {"scale": float, "tx": float, "ty": float}
All values in normalized [0,1] image space.
"""
indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24])
available = [i for i in indices if i in tpose_lm and i in target_lm]
if len(available) < 2:
# Not enough landmarks — return identity
return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0}
# Centroid of landmark group in each image
tp_xs = [tpose_lm[i]["x"] for i in available]
tp_ys = [tpose_lm[i]["y"] for i in available]
tg_xs = [target_lm[i]["x"] for i in available]
tg_ys = [target_lm[i]["y"] for i in available]
tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys)
tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys)
# Scale: ratio of bounding box sizes
tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6)
tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6)
scale = tg_spread / tp_spread
# Translation: move T-pose centroid to target centroid (after scaling)
tx = tg_cx - tp_cx * scale
ty = tg_cy - tp_cy * scale
# Rotation: angle between shoulder vectors (if both shoulders available)
rotation = 0.0
if 11 in available and 12 in available:
tp_angle = np.arctan2(
tpose_lm[12]["y"] - tpose_lm[11]["y"],
tpose_lm[12]["x"] - tpose_lm[11]["x"],
)
tg_angle = np.arctan2(
target_lm[12]["y"] - target_lm[11]["y"],
target_lm[12]["x"] - target_lm[11]["x"],
)
rotation = float(tg_angle - tp_angle)
return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation}