Spaces:
Sleeping
Sleeping
| import sys | |
| import numpy as np | |
| from PIL import Image | |
| def log(msg: str): | |
| print(msg, flush=True) | |
| sys.stdout.flush() | |
| # MediaPipe landmark indices relevant to each body region | |
| REGION_LANDMARKS = { | |
| "breast_left": [11, 12, 23, 24], # shoulders + hips (torso box) | |
| "breast_right": [11, 12, 23, 24], | |
| "buttocks": [23, 24, 25, 26], # hips + knees | |
| "ponytail": [0, 11, 12], # nose + shoulders | |
| "hair": [0, 11, 12], | |
| } | |
| # All landmarks needed across any region | |
| ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v)) | |
| _pose_cache = None | |
| def get_pose(): | |
| global _pose_cache | |
| if _pose_cache is None: | |
| log("[Pose] Loading MediaPipe Pose ...") | |
| import mediapipe as mp | |
| _pose_cache = mp.solutions.pose.Pose( | |
| static_image_mode=True, | |
| model_complexity=2, | |
| enable_segmentation=False, | |
| ) | |
| log("[Pose] MediaPipe Pose ready.") | |
| return _pose_cache | |
| def detect_landmarks(image: Image.Image) -> dict: | |
| """ | |
| Run MediaPipe Pose on image. | |
| Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}} | |
| Coordinates x,y are normalized [0,1] relative to image size. | |
| """ | |
| import mediapipe as mp | |
| pose = get_pose() | |
| img_rgb = np.array(image.convert("RGB")) | |
| log(f"[Pose] Running pose estimation on {image.size} image ...") | |
| results = pose.process(img_rgb) | |
| if not results.pose_landmarks: | |
| log("[Pose] No pose landmarks detected.") | |
| return {} | |
| W, H = image.size | |
| landmarks = {} | |
| for idx in ALL_NEEDED: | |
| lm = results.pose_landmarks.landmark[idx] | |
| landmarks[idx] = { | |
| "x": lm.x, # normalized [0,1] | |
| "y": lm.y, # normalized [0,1] | |
| "z": lm.z, # relative depth | |
| "visibility": lm.visibility, | |
| "px": lm.x * W, # pixel coords | |
| "py": lm.y * H, | |
| } | |
| log(f"[Pose] Detected {len(landmarks)} landmarks.") | |
| return landmarks | |
| def compute_region_transform( | |
| tpose_lm: dict, | |
| target_lm: dict, | |
| region: str, | |
| ) -> dict: | |
| """ | |
| Compute a similarity transform (scale + translation) mapping | |
| T-pose landmark positions → target image landmark positions for a given region. | |
| Returns {"scale": float, "tx": float, "ty": float} | |
| All values in normalized [0,1] image space. | |
| """ | |
| indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24]) | |
| available = [i for i in indices if i in tpose_lm and i in target_lm] | |
| if len(available) < 2: | |
| # Not enough landmarks — return identity | |
| return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0} | |
| # Centroid of landmark group in each image | |
| tp_xs = [tpose_lm[i]["x"] for i in available] | |
| tp_ys = [tpose_lm[i]["y"] for i in available] | |
| tg_xs = [target_lm[i]["x"] for i in available] | |
| tg_ys = [target_lm[i]["y"] for i in available] | |
| tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys) | |
| tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys) | |
| # Scale: ratio of bounding box sizes | |
| tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6) | |
| tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6) | |
| scale = tg_spread / tp_spread | |
| # Translation: move T-pose centroid to target centroid (after scaling) | |
| tx = tg_cx - tp_cx * scale | |
| ty = tg_cy - tp_cy * scale | |
| # Rotation: angle between shoulder vectors (if both shoulders available) | |
| rotation = 0.0 | |
| if 11 in available and 12 in available: | |
| tp_angle = np.arctan2( | |
| tpose_lm[12]["y"] - tpose_lm[11]["y"], | |
| tpose_lm[12]["x"] - tpose_lm[11]["x"], | |
| ) | |
| tg_angle = np.arctan2( | |
| target_lm[12]["y"] - target_lm[11]["y"], | |
| target_lm[12]["x"] - target_lm[11]["x"], | |
| ) | |
| rotation = float(tg_angle - tp_angle) | |
| return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation} | |