Spaces:

singlecell
/

jiggle-physics

Running on Zero

File size: 3,940 Bytes

289e92a

import sys
import numpy as np
from PIL import Image


def log(msg: str):
    print(msg, flush=True)
    sys.stdout.flush()


# MediaPipe landmark indices relevant to each body region
REGION_LANDMARKS = {
    "breast_left":  [11, 12, 23, 24],   # shoulders + hips (torso box)
    "breast_right": [11, 12, 23, 24],
    "buttocks":     [23, 24, 25, 26],   # hips + knees
    "ponytail":     [0,  11, 12],       # nose + shoulders
    "hair":         [0,  11, 12],
}

# All landmarks needed across any region
ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v))


_pose_cache = None


def get_pose():
    global _pose_cache
    if _pose_cache is None:
        log("[Pose] Loading MediaPipe Pose ...")
        import mediapipe as mp
        _pose_cache = mp.solutions.pose.Pose(
            static_image_mode=True,
            model_complexity=2,
            enable_segmentation=False,
        )
        log("[Pose] MediaPipe Pose ready.")
    return _pose_cache


def detect_landmarks(image: Image.Image) -> dict:
    """
    Run MediaPipe Pose on image.
    Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}}
    Coordinates x,y are normalized [0,1] relative to image size.
    """
    import mediapipe as mp
    pose = get_pose()

    img_rgb = np.array(image.convert("RGB"))
    log(f"[Pose] Running pose estimation on {image.size} image ...")
    results = pose.process(img_rgb)

    if not results.pose_landmarks:
        log("[Pose] No pose landmarks detected.")
        return {}

    W, H = image.size
    landmarks = {}
    for idx in ALL_NEEDED:
        lm = results.pose_landmarks.landmark[idx]
        landmarks[idx] = {
            "x": lm.x,          # normalized [0,1]
            "y": lm.y,          # normalized [0,1]
            "z": lm.z,          # relative depth
            "visibility": lm.visibility,
            "px": lm.x * W,     # pixel coords
            "py": lm.y * H,
        }

    log(f"[Pose] Detected {len(landmarks)} landmarks.")
    return landmarks


def compute_region_transform(
    tpose_lm: dict,
    target_lm: dict,
    region: str,
) -> dict:
    """
    Compute a similarity transform (scale + translation) mapping
    T-pose landmark positions → target image landmark positions for a given region.

    Returns {"scale": float, "tx": float, "ty": float}
    All values in normalized [0,1] image space.
    """
    indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24])
    available = [i for i in indices if i in tpose_lm and i in target_lm]

    if len(available) < 2:
        # Not enough landmarks — return identity
        return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0}

    # Centroid of landmark group in each image
    tp_xs = [tpose_lm[i]["x"] for i in available]
    tp_ys = [tpose_lm[i]["y"] for i in available]
    tg_xs = [target_lm[i]["x"] for i in available]
    tg_ys = [target_lm[i]["y"] for i in available]

    tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys)
    tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys)

    # Scale: ratio of bounding box sizes
    tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6)
    tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6)
    scale = tg_spread / tp_spread

    # Translation: move T-pose centroid to target centroid (after scaling)
    tx = tg_cx - tp_cx * scale
    ty = tg_cy - tp_cy * scale

    # Rotation: angle between shoulder vectors (if both shoulders available)
    rotation = 0.0
    if 11 in available and 12 in available:
        tp_angle = np.arctan2(
            tpose_lm[12]["y"] - tpose_lm[11]["y"],
            tpose_lm[12]["x"] - tpose_lm[11]["x"],
        )
        tg_angle = np.arctan2(
            target_lm[12]["y"] - target_lm[11]["y"],
            target_lm[12]["x"] - target_lm[11]["x"],
        )
        rotation = float(tg_angle - tp_angle)

    return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation}