"""MEI (Motion Euclidean-Invariant) representation. Converts between SMPL-H parameters and the 138-dimensional MEI vector, which provides SE(2) invariance (ground-plane translation + yaw rotation). MEI vector layout (138D): [0:2] CoM planar velocity in local frame (v_x, v_z) 2D [2:3] Heading angular velocity (d_theta / dt) 1D [3:6] Root position relative to CoM pivot, local frame 3D [6:12] Root rotation (heading-relative), 6D continuous 6D [12:138] Body joint rotations, 21 joints x 6D 126D --------------------------------------------------------------- Total 138D Coordinate convention (local frame, after heading alignment): X-axis: right Y-axis: up Z-axis: forward (heading direction) """ import numpy as np from .geometry import ( axis_angle_to_matrix, matrix_to_axis_angle, matrix_to_rotation_6d, rotation_6d_to_matrix, yaw_rotation_matrix, wrap_angle, ) MEI_DIM = 138 NUM_JOINTS = 22 # root + 21 body joints # SMPL 22-joint names SMPL_JOINT_NAMES = [ "Pelvis", "L_Hip", "R_Hip", "Spine1", "L_Knee", "R_Knee", "Spine2", "L_Ankle", "R_Ankle", "Spine3", "L_Foot", "R_Foot", "Neck", "L_Collar", "R_Collar", "Head", "L_Shoulder", "R_Shoulder", "L_Elbow", "R_Elbow", "L_Wrist", "R_Wrist", ] # Approximate body-segment mass ratios for CoM computation. # Adapted from de Leva (1996) for the SMPL 22-joint topology. # Each weight approximates the fraction of total body mass best # represented by the corresponding joint position. # _MASS_WEIGHTS = np.array([ # 0.142, # 0 Pelvis # 0.100, # 1 L_Hip # 0.100, # 2 R_Hip # 0.094, # 3 Spine1 # 0.047, # 4 L_Knee # 0.047, # 5 R_Knee # 0.094, # 6 Spine2 # 0.014, # 7 L_Ankle # 0.014, # 8 R_Ankle # 0.047, # 9 Spine3 # 0.014, # 10 L_Foot # 0.014, # 11 R_Foot # 0.028, # 12 Neck # 0.019, # 13 L_Collar # 0.019, # 14 R_Collar # 0.066, # 15 Head # 0.028, # 16 L_Shoulder # 0.028, # 17 R_Shoulder # 0.019, # 18 L_Elbow # 0.019, # 19 R_Elbow # 0.006, # 20 L_Wrist # 0.006, # 21 R_Wrist # ], dtype=np.float64) _MASS_WEIGHTS = np.array([ 1, # 0 Pelvis 0, # 1 L_Hip 0, # 2 R_Hip 0, # 3 Spine1 0, # 4 L_Knee 0, # 5 R_Knee 0, # 6 Spine2 0, # 7 L_Ankle 0, # 8 R_Ankle 0, # 9 Spine3 0, # 10 L_Foot 0, # 11 R_Foot 0, # 12 Neck 0, # 13 L_Collar 0, # 14 R_Collar 0, # 15 Head 0, # 16 L_Shoulder 0, # 17 R_Shoulder 0, # 18 L_Elbow 0, # 19 R_Elbow 0, # 20 L_Wrist 0, # 21 R_Wrist ], dtype=np.float64) JOINT_MASS_WEIGHTS = _MASS_WEIGHTS / _MASS_WEIGHTS.sum() # ------------------------------------------------------------------ # Helper functions # ------------------------------------------------------------------ def compute_com(joints: np.ndarray) -> np.ndarray: """Compute center of mass from 22-joint positions. Args: joints: (T, 22, 3) world-frame joint positions. Returns: com: (T, 3) center of mass. """ w = JOINT_MASS_WEIGHTS.reshape(1, -1, 1) # (1, 22, 1) return (joints * w).sum(axis=1) def compute_com_ground(joints: np.ndarray) -> np.ndarray: """CoM projected onto the ground plane (Y = 0). Args: joints: (T, 22, 3) world-frame joint positions. Returns: com_ground: (T, 3) with Y component set to 0. """ com = compute_com(joints) com[:, 1] = 0.0 return com def compute_heading(joints: np.ndarray) -> np.ndarray: """Extract heading angle from joint positions (HumanML3D convention). The heading (facing direction) is determined from the average of hip and shoulder lateral (left-to-right) vectors, cross-producted with the Y-up axis to obtain the forward direction in the XZ plane. To avoid 180-degree flips, each frame's heading is compared with the previous frame and the closer of {heading, heading + pi} is kept. Args: joints: (T, 22, 3) world-frame joint positions. Returns: heading: (T,) heading angles in radians. """ r_hip = SMPL_JOINT_NAMES.index("R_Hip") l_hip = SMPL_JOINT_NAMES.index("L_Hip") sdr_r = SMPL_JOINT_NAMES.index("R_Shoulder") sdr_l = SMPL_JOINT_NAMES.index("L_Shoulder") # Lateral vectors (left -> right) across1 = joints[:, r_hip] - joints[:, l_hip] # (T, 3) across2 = joints[:, sdr_r] - joints[:, sdr_l] # (T, 3) across = across1 + across2 # (T, 3) # Forward = Y_up x right_direction (result lies in XZ plane, Y=0) # Do NOT normalize across beforehand: ||forward|| = ||across|| * sin(theta) # naturally captures both degeneracies (across ≈ 0 and across ≈ vertical). forward = np.cross(np.array([[0, 1, 0]]), across, axis=-1) # (T, 3) xz_norm = np.linalg.norm(forward, axis=-1) # (T,) forward = forward / np.maximum(xz_norm[:, np.newaxis], 1e-12) raw_heading = np.arctan2(forward[:, 0], forward[:, 2]) # Resolve ambiguities frame by frame heading = np.empty_like(raw_heading) heading[0] = raw_heading[0] for t in range(1, len(heading)): # If across is nearly vertical, cross product is degenerate -> keep previous if xz_norm[t] < 1e-3: heading[t] = heading[t - 1] continue h = raw_heading[t] h_flip = h + np.pi # Pick whichever is closer to previous frame (resolve 180 flip) diff_h = abs(wrap_angle(h - heading[t - 1])) diff_flip = abs(wrap_angle(h_flip - heading[t - 1])) heading[t] = h if diff_h <= diff_flip else wrap_angle(h_flip) return heading # ------------------------------------------------------------------ # Encoding: SMPL-H -> MEI # ------------------------------------------------------------------ def smplh_to_mei( root_orient: np.ndarray, body_pose: np.ndarray, joints: np.ndarray, fps: float = 30.0, ) -> np.ndarray: """Convert SMPL-H parameters to MEI representation. Args: root_orient: (T, 3) root rotation in axis-angle. body_pose: (T, 21, 3) or (T, 63) body joint rotations in axis-angle. joints: (T, 22, 3) world-frame 3D joint positions (from SMPL-H FK). joints[:, 0] is the pelvis (root) world position. fps: frame rate (default 30). Returns: mei: (T, 138) MEI representation. """ T = root_orient.shape[0] if body_pose.ndim == 2 and body_pose.shape[-1] == 63: body_pose = body_pose.reshape(T, 21, 3) # 1. Heading from joint positions (HumanML3D convention) heading = compute_heading(joints) # (T,) # 2. CoM ground projection com_ground = compute_com_ground(joints) # (T, 3) # 3. Heading angular velocity (backward difference) heading_vel = np.zeros(T) if T > 1: heading_vel[0] = wrap_angle(heading[1] - heading[0]) * fps # forward difference for first frame heading_vel[1:] = wrap_angle(heading[1:] - heading[:-1]) * fps # 4. CoM planar velocity in local frame (backward diff) R_heading = yaw_rotation_matrix(heading) # (T, 3, 3) R_inv = np.transpose(R_heading, (0, 2, 1)) # (T, 3, 3) com_vel_global = np.zeros_like(com_ground) if T > 1: com_vel_global[0] = (com_ground[1] - com_ground[0]) * fps # forward difference for first frame com_vel_global[1:] = (com_ground[1:] - com_ground[:-1]) * fps com_vel_local = np.einsum("tij,tj->ti", R_inv, com_vel_global) # (T, 3) com_vel_planar = com_vel_local[:, [0, 2]] # (T, 2) [v_x, v_z] # 5. Root position relative to CoM pivot, in local frame root_offset_global = joints[:, 0] - com_ground # (T, 3) root_offset_local = np.einsum("tij,tj->ti", R_inv, root_offset_global) # 6. Root rotation relative to heading, encoded as 6D root_rotmat = axis_angle_to_matrix(root_orient) # (T, 3, 3) root_rotmat_local = np.einsum("tij,tjk->tik", R_inv, root_rotmat) root_rot6d = matrix_to_rotation_6d(root_rotmat_local) # (T, 6) # 7. Body joint rotations -> 6D (already in parent-local frame) body_rotmat = axis_angle_to_matrix(body_pose) # (T, 21, 3, 3) body_rot6d = matrix_to_rotation_6d(body_rotmat).reshape(T, -1) # (T, 126) # Assemble mei = np.concatenate([ com_vel_planar, # 0:2 (2) heading_vel[:, None], # 2:3 (1) root_offset_local, # 3:6 (3) root_rot6d, # 6:12 (6) body_rot6d, # 12:138 (126) ], axis=-1) assert mei.shape == (T, MEI_DIM), f"Expected ({T}, {MEI_DIM}), got {mei.shape}" return mei # ------------------------------------------------------------------ # Decoding: MEI -> SMPL-H # ------------------------------------------------------------------ def mei_to_smplh( mei: np.ndarray, fps: float = 30.0, initial_heading: float = 0.0, initial_com: np.ndarray = np.array([0.0, 0.0]) ) -> dict: """Convert MEI representation back to SMPL-H parameters. Because MEI is SE(2)-invariant, the absolute ground-plane position and yaw are lost. They are recovered from *initial_heading* and *initial_com* (both default to zero, placing the motion at the origin facing +Z). Args: mei: (T, 138) MEI representation. fps: frame rate (default 30). initial_heading: heading angle at frame 0 (radians). initial_com: (2,) XZ position of CoM at frame 0, default [0, 0]. Returns: dict with: root_orient: (T, 3) axis-angle root rotation. body_pose: (T, 63) axis-angle body joint rotations. transl: (T, 3) global root translation. """ T = mei.shape[0] # --- Parse --- com_vel_planar = mei[:, 0:2] # (T, 2) heading_vel = mei[:, 2] # (T,) root_offset_local = mei[:, 3:6] # (T, 3) root_rot6d = mei[:, 6:12] # (T, 6) body_rot6d = mei[:, 12:138] # (T, 126) # 1. Integrate heading angular velocity heading = np.zeros(T) heading[0] = initial_heading for t in range(1, T): heading[t] = heading[t - 1] + heading_vel[t] / fps heading = wrap_angle(heading) # 2. Integrate CoM ground trajectory R_heading = yaw_rotation_matrix(heading) # (T, 3, 3) com_vel_local_3d = np.zeros((T, 3)) com_vel_local_3d[:, 0] = com_vel_planar[:, 0] com_vel_local_3d[:, 2] = com_vel_planar[:, 1] com_vel_global = np.einsum("tij,tj->ti", R_heading, com_vel_local_3d) com_ground = np.zeros((T, 3)) com_ground[0, 0] = initial_com[0] com_ground[0, 2] = initial_com[1] for t in range(1, T): com_ground[t] = com_ground[t - 1] + com_vel_global[t] / fps # 3. Root position (global) root_offset_global = np.einsum("tij,tj->ti", R_heading, root_offset_local) transl = com_ground + root_offset_global # 4. Root rotation (global) root_rotmat_local = rotation_6d_to_matrix(root_rot6d) # (T, 3, 3) root_rotmat = np.einsum("tij,tjk->tik", R_heading, root_rotmat_local) root_orient = matrix_to_axis_angle(root_rotmat) # (T, 3) # 5. Body joint rotations -> axis-angle body_rotmat = rotation_6d_to_matrix(body_rot6d.reshape(T, 21, 6)) body_pose = matrix_to_axis_angle(body_rotmat).reshape(T, 63) return { "root_orient": root_orient, "body_pose": body_pose, "transl": transl, }