import sys import numpy as np from PIL import Image def log(msg: str): print(msg, flush=True) sys.stdout.flush() # MediaPipe landmark indices relevant to each body region REGION_LANDMARKS = { "breast_left": [11, 12, 23, 24], # shoulders + hips (torso box) "breast_right": [11, 12, 23, 24], "buttocks": [23, 24, 25, 26], # hips + knees "ponytail": [0, 11, 12], # nose + shoulders "hair": [0, 11, 12], } # All landmarks needed across any region ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v)) _pose_cache = None def get_pose(): global _pose_cache if _pose_cache is None: log("[Pose] Loading MediaPipe Pose ...") import mediapipe as mp _pose_cache = mp.solutions.pose.Pose( static_image_mode=True, model_complexity=2, enable_segmentation=False, ) log("[Pose] MediaPipe Pose ready.") return _pose_cache def detect_landmarks(image: Image.Image) -> dict: """ Run MediaPipe Pose on image. Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}} Coordinates x,y are normalized [0,1] relative to image size. """ import mediapipe as mp pose = get_pose() img_rgb = np.array(image.convert("RGB")) log(f"[Pose] Running pose estimation on {image.size} image ...") results = pose.process(img_rgb) if not results.pose_landmarks: log("[Pose] No pose landmarks detected.") return {} W, H = image.size landmarks = {} for idx in ALL_NEEDED: lm = results.pose_landmarks.landmark[idx] landmarks[idx] = { "x": lm.x, # normalized [0,1] "y": lm.y, # normalized [0,1] "z": lm.z, # relative depth "visibility": lm.visibility, "px": lm.x * W, # pixel coords "py": lm.y * H, } log(f"[Pose] Detected {len(landmarks)} landmarks.") return landmarks def compute_region_transform( tpose_lm: dict, target_lm: dict, region: str, ) -> dict: """ Compute a similarity transform (scale + translation) mapping T-pose landmark positions → target image landmark positions for a given region. Returns {"scale": float, "tx": float, "ty": float} All values in normalized [0,1] image space. """ indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24]) available = [i for i in indices if i in tpose_lm and i in target_lm] if len(available) < 2: # Not enough landmarks — return identity return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0} # Centroid of landmark group in each image tp_xs = [tpose_lm[i]["x"] for i in available] tp_ys = [tpose_lm[i]["y"] for i in available] tg_xs = [target_lm[i]["x"] for i in available] tg_ys = [target_lm[i]["y"] for i in available] tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys) tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys) # Scale: ratio of bounding box sizes tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6) tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6) scale = tg_spread / tp_spread # Translation: move T-pose centroid to target centroid (after scaling) tx = tg_cx - tp_cx * scale ty = tg_cy - tp_cy * scale # Rotation: angle between shoulder vectors (if both shoulders available) rotation = 0.0 if 11 in available and 12 in available: tp_angle = np.arctan2( tpose_lm[12]["y"] - tpose_lm[11]["y"], tpose_lm[12]["x"] - tpose_lm[11]["x"], ) tg_angle = np.arctan2( target_lm[12]["y"] - target_lm[11]["y"], target_lm[12]["x"] - target_lm[11]["x"], ) rotation = float(tg_angle - tp_angle) return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation}