Spaces:

singlecell
/

jiggle-physics

Sleeping

Justin Wood

Add two-photo workflow: MediaPipe pose detection + /api/pose endpoint

289e92a 14 days ago

3.94 kB

	import sys
	import numpy as np
	from PIL import Image


	def log(msg: str):
	print(msg, flush=True)
	sys.stdout.flush()


	# MediaPipe landmark indices relevant to each body region
	REGION_LANDMARKS = {
	"breast_left": [11, 12, 23, 24], # shoulders + hips (torso box)
	"breast_right": [11, 12, 23, 24],
	"buttocks": [23, 24, 25, 26], # hips + knees
	"ponytail": [0, 11, 12], # nose + shoulders
	"hair": [0, 11, 12],
	}

	# All landmarks needed across any region
	ALL_NEEDED = sorted(set(i for v in REGION_LANDMARKS.values() for i in v))


	_pose_cache = None


	def get_pose():
	global _pose_cache
	if _pose_cache is None:
	log("[Pose] Loading MediaPipe Pose ...")
	import mediapipe as mp
	_pose_cache = mp.solutions.pose.Pose(
	static_image_mode=True,
	model_complexity=2,
	enable_segmentation=False,
	)
	log("[Pose] MediaPipe Pose ready.")
	return _pose_cache


	def detect_landmarks(image: Image.Image) -> dict:
	"""
	Run MediaPipe Pose on image.
	Returns {landmark_index: {"x": float, "y": float, "z": float, "visibility": float}}
	Coordinates x,y are normalized [0,1] relative to image size.
	"""
	import mediapipe as mp
	pose = get_pose()

	img_rgb = np.array(image.convert("RGB"))
	log(f"[Pose] Running pose estimation on {image.size} image ...")
	results = pose.process(img_rgb)

	if not results.pose_landmarks:
	log("[Pose] No pose landmarks detected.")
	return {}

	W, H = image.size
	landmarks = {}
	for idx in ALL_NEEDED:
	lm = results.pose_landmarks.landmark[idx]
	landmarks[idx] = {
	"x": lm.x, # normalized [0,1]
	"y": lm.y, # normalized [0,1]
	"z": lm.z, # relative depth
	"visibility": lm.visibility,
	"px": lm.x * W, # pixel coords
	"py": lm.y * H,
	}

	log(f"[Pose] Detected {len(landmarks)} landmarks.")
	return landmarks


	def compute_region_transform(
	tpose_lm: dict,
	target_lm: dict,
	region: str,
	) -> dict:
	"""
	Compute a similarity transform (scale + translation) mapping
	T-pose landmark positions → target image landmark positions for a given region.

	Returns {"scale": float, "tx": float, "ty": float}
	All values in normalized [0,1] image space.
	"""
	indices = REGION_LANDMARKS.get(region, [11, 12, 23, 24])
	available = [i for i in indices if i in tpose_lm and i in target_lm]

	if len(available) < 2:
	# Not enough landmarks — return identity
	return {"scale": 1.0, "tx": 0.0, "ty": 0.0, "rotation": 0.0}

	# Centroid of landmark group in each image
	tp_xs = [tpose_lm[i]["x"] for i in available]
	tp_ys = [tpose_lm[i]["y"] for i in available]
	tg_xs = [target_lm[i]["x"] for i in available]
	tg_ys = [target_lm[i]["y"] for i in available]

	tp_cx, tp_cy = np.mean(tp_xs), np.mean(tp_ys)
	tg_cx, tg_cy = np.mean(tg_xs), np.mean(tg_ys)

	# Scale: ratio of bounding box sizes
	tp_spread = max(np.std(tp_xs) + np.std(tp_ys), 1e-6)
	tg_spread = max(np.std(tg_xs) + np.std(tg_ys), 1e-6)
	scale = tg_spread / tp_spread

	# Translation: move T-pose centroid to target centroid (after scaling)
	tx = tg_cx - tp_cx * scale
	ty = tg_cy - tp_cy * scale

	# Rotation: angle between shoulder vectors (if both shoulders available)
	rotation = 0.0
	if 11 in available and 12 in available:
	tp_angle = np.arctan2(
	tpose_lm[12]["y"] - tpose_lm[11]["y"],
	tpose_lm[12]["x"] - tpose_lm[11]["x"],
	)
	tg_angle = np.arctan2(
	target_lm[12]["y"] - target_lm[11]["y"],
	target_lm[12]["x"] - target_lm[11]["x"],
	)
	rotation = float(tg_angle - tp_angle)

	return {"scale": float(scale), "tx": float(tx), "ty": float(ty), "rotation": rotation}