FloodDiffusion-MEI / visualization /MEI138 /representation.py
H-Liu1997's picture
Upload visualization/MEI138/representation.py with huggingface_hub
191d2cc verified
"""MEI (Motion Euclidean-Invariant) representation.
Converts between SMPL-H parameters and the 138-dimensional MEI vector,
which provides SE(2) invariance (ground-plane translation + yaw rotation).
MEI vector layout (138D):
[0:2] CoM planar velocity in local frame (v_x, v_z) 2D
[2:3] Heading angular velocity (d_theta / dt) 1D
[3:6] Root position relative to CoM pivot, local frame 3D
[6:12] Root rotation (heading-relative), 6D continuous 6D
[12:138] Body joint rotations, 21 joints x 6D 126D
---------------------------------------------------------------
Total 138D
Coordinate convention (local frame, after heading alignment):
X-axis: right
Y-axis: up
Z-axis: forward (heading direction)
"""
import numpy as np
from .geometry import (
axis_angle_to_matrix,
matrix_to_axis_angle,
matrix_to_rotation_6d,
rotation_6d_to_matrix,
yaw_rotation_matrix,
wrap_angle,
)
MEI_DIM = 138
NUM_JOINTS = 22 # root + 21 body joints
# SMPL 22-joint names
SMPL_JOINT_NAMES = [
"Pelvis", "L_Hip", "R_Hip", "Spine1",
"L_Knee", "R_Knee", "Spine2", "L_Ankle",
"R_Ankle", "Spine3", "L_Foot", "R_Foot",
"Neck", "L_Collar", "R_Collar", "Head",
"L_Shoulder", "R_Shoulder", "L_Elbow", "R_Elbow",
"L_Wrist", "R_Wrist",
]
# Approximate body-segment mass ratios for CoM computation.
# Adapted from de Leva (1996) for the SMPL 22-joint topology.
# Each weight approximates the fraction of total body mass best
# represented by the corresponding joint position.
# _MASS_WEIGHTS = np.array([
# 0.142, # 0 Pelvis
# 0.100, # 1 L_Hip
# 0.100, # 2 R_Hip
# 0.094, # 3 Spine1
# 0.047, # 4 L_Knee
# 0.047, # 5 R_Knee
# 0.094, # 6 Spine2
# 0.014, # 7 L_Ankle
# 0.014, # 8 R_Ankle
# 0.047, # 9 Spine3
# 0.014, # 10 L_Foot
# 0.014, # 11 R_Foot
# 0.028, # 12 Neck
# 0.019, # 13 L_Collar
# 0.019, # 14 R_Collar
# 0.066, # 15 Head
# 0.028, # 16 L_Shoulder
# 0.028, # 17 R_Shoulder
# 0.019, # 18 L_Elbow
# 0.019, # 19 R_Elbow
# 0.006, # 20 L_Wrist
# 0.006, # 21 R_Wrist
# ], dtype=np.float64)
_MASS_WEIGHTS = np.array([
1, # 0 Pelvis
0, # 1 L_Hip
0, # 2 R_Hip
0, # 3 Spine1
0, # 4 L_Knee
0, # 5 R_Knee
0, # 6 Spine2
0, # 7 L_Ankle
0, # 8 R_Ankle
0, # 9 Spine3
0, # 10 L_Foot
0, # 11 R_Foot
0, # 12 Neck
0, # 13 L_Collar
0, # 14 R_Collar
0, # 15 Head
0, # 16 L_Shoulder
0, # 17 R_Shoulder
0, # 18 L_Elbow
0, # 19 R_Elbow
0, # 20 L_Wrist
0, # 21 R_Wrist
], dtype=np.float64)
JOINT_MASS_WEIGHTS = _MASS_WEIGHTS / _MASS_WEIGHTS.sum()
# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------
def compute_com(joints: np.ndarray) -> np.ndarray:
"""Compute center of mass from 22-joint positions.
Args:
joints: (T, 22, 3) world-frame joint positions.
Returns:
com: (T, 3) center of mass.
"""
w = JOINT_MASS_WEIGHTS.reshape(1, -1, 1) # (1, 22, 1)
return (joints * w).sum(axis=1)
def compute_com_ground(joints: np.ndarray) -> np.ndarray:
"""CoM projected onto the ground plane (Y = 0).
Args:
joints: (T, 22, 3) world-frame joint positions.
Returns:
com_ground: (T, 3) with Y component set to 0.
"""
com = compute_com(joints)
com[:, 1] = 0.0
return com
def compute_heading(joints: np.ndarray) -> np.ndarray:
"""Extract heading angle from joint positions (HumanML3D convention).
The heading (facing direction) is determined from the average of
hip and shoulder lateral (left-to-right) vectors, cross-producted
with the Y-up axis to obtain the forward direction in the XZ plane.
To avoid 180-degree flips, each frame's heading is compared with
the previous frame and the closer of {heading, heading + pi} is kept.
Args:
joints: (T, 22, 3) world-frame joint positions.
Returns:
heading: (T,) heading angles in radians.
"""
r_hip = SMPL_JOINT_NAMES.index("R_Hip")
l_hip = SMPL_JOINT_NAMES.index("L_Hip")
sdr_r = SMPL_JOINT_NAMES.index("R_Shoulder")
sdr_l = SMPL_JOINT_NAMES.index("L_Shoulder")
# Lateral vectors (left -> right)
across1 = joints[:, r_hip] - joints[:, l_hip] # (T, 3)
across2 = joints[:, sdr_r] - joints[:, sdr_l] # (T, 3)
across = across1 + across2 # (T, 3)
# Forward = Y_up x right_direction (result lies in XZ plane, Y=0)
# Do NOT normalize across beforehand: ||forward|| = ||across|| * sin(theta)
# naturally captures both degeneracies (across ≈ 0 and across ≈ vertical).
forward = np.cross(np.array([[0, 1, 0]]), across, axis=-1) # (T, 3)
xz_norm = np.linalg.norm(forward, axis=-1) # (T,)
forward = forward / np.maximum(xz_norm[:, np.newaxis], 1e-12)
raw_heading = np.arctan2(forward[:, 0], forward[:, 2])
# Resolve ambiguities frame by frame
heading = np.empty_like(raw_heading)
heading[0] = raw_heading[0]
for t in range(1, len(heading)):
# If across is nearly vertical, cross product is degenerate -> keep previous
if xz_norm[t] < 1e-3:
heading[t] = heading[t - 1]
continue
h = raw_heading[t]
h_flip = h + np.pi
# Pick whichever is closer to previous frame (resolve 180 flip)
diff_h = abs(wrap_angle(h - heading[t - 1]))
diff_flip = abs(wrap_angle(h_flip - heading[t - 1]))
heading[t] = h if diff_h <= diff_flip else wrap_angle(h_flip)
return heading
# ------------------------------------------------------------------
# Encoding: SMPL-H -> MEI
# ------------------------------------------------------------------
def smplh_to_mei(
root_orient: np.ndarray,
body_pose: np.ndarray,
joints: np.ndarray,
fps: float = 30.0,
) -> np.ndarray:
"""Convert SMPL-H parameters to MEI representation.
Args:
root_orient: (T, 3) root rotation in axis-angle.
body_pose: (T, 21, 3) or (T, 63) body joint rotations in axis-angle.
joints: (T, 22, 3) world-frame 3D joint positions (from SMPL-H FK).
joints[:, 0] is the pelvis (root) world position.
fps: frame rate (default 30).
Returns:
mei: (T, 138) MEI representation.
"""
T = root_orient.shape[0]
if body_pose.ndim == 2 and body_pose.shape[-1] == 63:
body_pose = body_pose.reshape(T, 21, 3)
# 1. Heading from joint positions (HumanML3D convention)
heading = compute_heading(joints) # (T,)
# 2. CoM ground projection
com_ground = compute_com_ground(joints) # (T, 3)
# 3. Heading angular velocity (backward difference)
heading_vel = np.zeros(T)
if T > 1:
heading_vel[0] = wrap_angle(heading[1] - heading[0]) * fps # forward difference for first frame
heading_vel[1:] = wrap_angle(heading[1:] - heading[:-1]) * fps
# 4. CoM planar velocity in local frame (backward diff)
R_heading = yaw_rotation_matrix(heading) # (T, 3, 3)
R_inv = np.transpose(R_heading, (0, 2, 1)) # (T, 3, 3)
com_vel_global = np.zeros_like(com_ground)
if T > 1:
com_vel_global[0] = (com_ground[1] - com_ground[0]) * fps # forward difference for first frame
com_vel_global[1:] = (com_ground[1:] - com_ground[:-1]) * fps
com_vel_local = np.einsum("tij,tj->ti", R_inv, com_vel_global) # (T, 3)
com_vel_planar = com_vel_local[:, [0, 2]] # (T, 2) [v_x, v_z]
# 5. Root position relative to CoM pivot, in local frame
root_offset_global = joints[:, 0] - com_ground # (T, 3)
root_offset_local = np.einsum("tij,tj->ti", R_inv, root_offset_global)
# 6. Root rotation relative to heading, encoded as 6D
root_rotmat = axis_angle_to_matrix(root_orient) # (T, 3, 3)
root_rotmat_local = np.einsum("tij,tjk->tik", R_inv, root_rotmat)
root_rot6d = matrix_to_rotation_6d(root_rotmat_local) # (T, 6)
# 7. Body joint rotations -> 6D (already in parent-local frame)
body_rotmat = axis_angle_to_matrix(body_pose) # (T, 21, 3, 3)
body_rot6d = matrix_to_rotation_6d(body_rotmat).reshape(T, -1) # (T, 126)
# Assemble
mei = np.concatenate([
com_vel_planar, # 0:2 (2)
heading_vel[:, None], # 2:3 (1)
root_offset_local, # 3:6 (3)
root_rot6d, # 6:12 (6)
body_rot6d, # 12:138 (126)
], axis=-1)
assert mei.shape == (T, MEI_DIM), f"Expected ({T}, {MEI_DIM}), got {mei.shape}"
return mei
# ------------------------------------------------------------------
# Decoding: MEI -> SMPL-H
# ------------------------------------------------------------------
def mei_to_smplh(
mei: np.ndarray,
fps: float = 30.0,
initial_heading: float = 0.0,
initial_com: np.ndarray = np.array([0.0, 0.0])
) -> dict:
"""Convert MEI representation back to SMPL-H parameters.
Because MEI is SE(2)-invariant, the absolute ground-plane position and
yaw are lost. They are recovered from *initial_heading* and
*initial_com* (both default to zero, placing the motion at the origin
facing +Z).
Args:
mei: (T, 138) MEI representation.
fps: frame rate (default 30).
initial_heading: heading angle at frame 0 (radians).
initial_com: (2,) XZ position of CoM at frame 0, default [0, 0].
Returns:
dict with:
root_orient: (T, 3) axis-angle root rotation.
body_pose: (T, 63) axis-angle body joint rotations.
transl: (T, 3) global root translation.
"""
T = mei.shape[0]
# --- Parse ---
com_vel_planar = mei[:, 0:2] # (T, 2)
heading_vel = mei[:, 2] # (T,)
root_offset_local = mei[:, 3:6] # (T, 3)
root_rot6d = mei[:, 6:12] # (T, 6)
body_rot6d = mei[:, 12:138] # (T, 126)
# 1. Integrate heading angular velocity
heading = np.zeros(T)
heading[0] = initial_heading
for t in range(1, T):
heading[t] = heading[t - 1] + heading_vel[t] / fps
heading = wrap_angle(heading)
# 2. Integrate CoM ground trajectory
R_heading = yaw_rotation_matrix(heading) # (T, 3, 3)
com_vel_local_3d = np.zeros((T, 3))
com_vel_local_3d[:, 0] = com_vel_planar[:, 0]
com_vel_local_3d[:, 2] = com_vel_planar[:, 1]
com_vel_global = np.einsum("tij,tj->ti", R_heading, com_vel_local_3d)
com_ground = np.zeros((T, 3))
com_ground[0, 0] = initial_com[0]
com_ground[0, 2] = initial_com[1]
for t in range(1, T):
com_ground[t] = com_ground[t - 1] + com_vel_global[t] / fps
# 3. Root position (global)
root_offset_global = np.einsum("tij,tj->ti", R_heading, root_offset_local)
transl = com_ground + root_offset_global
# 4. Root rotation (global)
root_rotmat_local = rotation_6d_to_matrix(root_rot6d) # (T, 3, 3)
root_rotmat = np.einsum("tij,tjk->tik", R_heading, root_rotmat_local)
root_orient = matrix_to_axis_angle(root_rotmat) # (T, 3)
# 5. Body joint rotations -> axis-angle
body_rotmat = rotation_6d_to_matrix(body_rot6d.reshape(T, 21, 6))
body_pose = matrix_to_axis_angle(body_rotmat).reshape(T, 63)
return {
"root_orient": root_orient,
"body_pose": body_pose,
"transl": transl,
}