File size: 11,826 Bytes
191d2cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
"""MEI (Motion Euclidean-Invariant) representation.

Converts between SMPL-H parameters and the 138-dimensional MEI vector,
which provides SE(2) invariance (ground-plane translation + yaw rotation).

MEI vector layout (138D):
    [0:2]     CoM planar velocity in local frame (v_x, v_z)       2D
    [2:3]     Heading angular velocity (d_theta / dt)              1D
    [3:6]     Root position relative to CoM pivot, local frame     3D
    [6:12]    Root rotation (heading-relative), 6D continuous       6D
    [12:138]  Body joint rotations, 21 joints x 6D               126D
    ---------------------------------------------------------------
    Total                                                        138D

Coordinate convention (local frame, after heading alignment):
    X-axis: right
    Y-axis: up
    Z-axis: forward (heading direction)
"""

import numpy as np
from .geometry import (
    axis_angle_to_matrix,
    matrix_to_axis_angle,
    matrix_to_rotation_6d,
    rotation_6d_to_matrix,
    yaw_rotation_matrix,
    wrap_angle,
)

MEI_DIM = 138
NUM_JOINTS = 22       # root + 21 body joints

# SMPL 22-joint names
SMPL_JOINT_NAMES = [
    "Pelvis",     "L_Hip",      "R_Hip",      "Spine1",
    "L_Knee",     "R_Knee",     "Spine2",     "L_Ankle",
    "R_Ankle",    "Spine3",     "L_Foot",     "R_Foot",
    "Neck",       "L_Collar",   "R_Collar",   "Head",
    "L_Shoulder", "R_Shoulder", "L_Elbow",    "R_Elbow",
    "L_Wrist",    "R_Wrist",
]

# Approximate body-segment mass ratios for CoM computation.
# Adapted from de Leva (1996) for the SMPL 22-joint topology.
# Each weight approximates the fraction of total body mass best
# represented by the corresponding joint position.
# _MASS_WEIGHTS = np.array([
#     0.142,  # 0  Pelvis
#     0.100,  # 1  L_Hip
#     0.100,  # 2  R_Hip
#     0.094,  # 3  Spine1
#     0.047,  # 4  L_Knee
#     0.047,  # 5  R_Knee
#     0.094,  # 6  Spine2
#     0.014,  # 7  L_Ankle
#     0.014,  # 8  R_Ankle
#     0.047,  # 9  Spine3
#     0.014,  # 10 L_Foot
#     0.014,  # 11 R_Foot
#     0.028,  # 12 Neck
#     0.019,  # 13 L_Collar
#     0.019,  # 14 R_Collar
#     0.066,  # 15 Head
#     0.028,  # 16 L_Shoulder
#     0.028,  # 17 R_Shoulder
#     0.019,  # 18 L_Elbow
#     0.019,  # 19 R_Elbow
#     0.006,  # 20 L_Wrist
#     0.006,  # 21 R_Wrist
# ], dtype=np.float64)
_MASS_WEIGHTS = np.array([
    1,  # 0  Pelvis
    0,  # 1  L_Hip
    0,  # 2  R_Hip
    0,  # 3  Spine1
    0,  # 4  L_Knee
    0,  # 5  R_Knee
    0,  # 6  Spine2
    0,  # 7  L_Ankle
    0,  # 8  R_Ankle
    0,  # 9  Spine3
    0,  # 10 L_Foot
    0,  # 11 R_Foot
    0,  # 12 Neck
    0,  # 13 L_Collar
    0,  # 14 R_Collar
    0,  # 15 Head
    0,  # 16 L_Shoulder
    0,  # 17 R_Shoulder
    0,  # 18 L_Elbow
    0,  # 19 R_Elbow
    0,  # 20 L_Wrist
    0,  # 21 R_Wrist
], dtype=np.float64)
JOINT_MASS_WEIGHTS = _MASS_WEIGHTS / _MASS_WEIGHTS.sum()


# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------

def compute_com(joints: np.ndarray) -> np.ndarray:
    """Compute center of mass from 22-joint positions.

    Args:
        joints: (T, 22, 3) world-frame joint positions.

    Returns:
        com: (T, 3) center of mass.
    """
    w = JOINT_MASS_WEIGHTS.reshape(1, -1, 1)  # (1, 22, 1)
    return (joints * w).sum(axis=1)


def compute_com_ground(joints: np.ndarray) -> np.ndarray:
    """CoM projected onto the ground plane (Y = 0).

    Args:
        joints: (T, 22, 3) world-frame joint positions.

    Returns:
        com_ground: (T, 3) with Y component set to 0.
    """
    com = compute_com(joints)
    com[:, 1] = 0.0
    return com


def compute_heading(joints: np.ndarray) -> np.ndarray:
    """Extract heading angle from joint positions (HumanML3D convention).

    The heading (facing direction) is determined from the average of
    hip and shoulder lateral (left-to-right) vectors, cross-producted
    with the Y-up axis to obtain the forward direction in the XZ plane.

    To avoid 180-degree flips, each frame's heading is compared with
    the previous frame and the closer of {heading, heading + pi} is kept.

    Args:
        joints: (T, 22, 3) world-frame joint positions.

    Returns:
        heading: (T,) heading angles in radians.
    """
    r_hip = SMPL_JOINT_NAMES.index("R_Hip")
    l_hip = SMPL_JOINT_NAMES.index("L_Hip")
    sdr_r = SMPL_JOINT_NAMES.index("R_Shoulder")
    sdr_l = SMPL_JOINT_NAMES.index("L_Shoulder")

    # Lateral vectors (left -> right)
    across1 = joints[:, r_hip] - joints[:, l_hip]      # (T, 3)
    across2 = joints[:, sdr_r] - joints[:, sdr_l]      # (T, 3)
    across = across1 + across2                           # (T, 3)

    # Forward = Y_up x right_direction (result lies in XZ plane, Y=0)
    # Do NOT normalize across beforehand: ||forward|| = ||across|| * sin(theta)
    # naturally captures both degeneracies (across ≈ 0 and across ≈ vertical).
    forward = np.cross(np.array([[0, 1, 0]]), across, axis=-1)  # (T, 3)
    xz_norm = np.linalg.norm(forward, axis=-1)                  # (T,)
    forward = forward / np.maximum(xz_norm[:, np.newaxis], 1e-12)

    raw_heading = np.arctan2(forward[:, 0], forward[:, 2])

    # Resolve ambiguities frame by frame
    heading = np.empty_like(raw_heading)
    heading[0] = raw_heading[0]
    for t in range(1, len(heading)):
        # If across is nearly vertical, cross product is degenerate -> keep previous
        if xz_norm[t] < 1e-3:
            heading[t] = heading[t - 1]
            continue
        h = raw_heading[t]
        h_flip = h + np.pi
        # Pick whichever is closer to previous frame (resolve 180 flip)
        diff_h = abs(wrap_angle(h - heading[t - 1]))
        diff_flip = abs(wrap_angle(h_flip - heading[t - 1]))
        heading[t] = h if diff_h <= diff_flip else wrap_angle(h_flip)
    return heading


# ------------------------------------------------------------------
# Encoding: SMPL-H -> MEI
# ------------------------------------------------------------------

def smplh_to_mei(
    root_orient: np.ndarray,
    body_pose: np.ndarray,
    joints: np.ndarray,
    fps: float = 30.0,
) -> np.ndarray:
    """Convert SMPL-H parameters to MEI representation.

    Args:
        root_orient: (T, 3) root rotation in axis-angle.
        body_pose:   (T, 21, 3) or (T, 63) body joint rotations in axis-angle.
        joints:      (T, 22, 3) world-frame 3D joint positions (from SMPL-H FK).
                     joints[:, 0] is the pelvis (root) world position.
        fps:         frame rate (default 30).

    Returns:
        mei: (T, 138) MEI representation.
    """
    T = root_orient.shape[0]
    if body_pose.ndim == 2 and body_pose.shape[-1] == 63:
        body_pose = body_pose.reshape(T, 21, 3)

    # 1. Heading from joint positions (HumanML3D convention)
    heading = compute_heading(joints)             # (T,)

    # 2. CoM ground projection
    com_ground = compute_com_ground(joints)       # (T, 3)

    # 3. Heading angular velocity (backward difference)
    heading_vel = np.zeros(T)
    if T > 1:
        heading_vel[0] = wrap_angle(heading[1] - heading[0]) * fps  # forward difference for first frame
        heading_vel[1:] = wrap_angle(heading[1:] - heading[:-1]) * fps

    # 4. CoM planar velocity in local frame (backward diff)
    R_heading = yaw_rotation_matrix(heading)      # (T, 3, 3)
    R_inv = np.transpose(R_heading, (0, 2, 1))   # (T, 3, 3)

    com_vel_global = np.zeros_like(com_ground)
    if T > 1:
        com_vel_global[0] = (com_ground[1] - com_ground[0]) * fps  # forward difference for first frame
        com_vel_global[1:] = (com_ground[1:] - com_ground[:-1]) * fps

    com_vel_local = np.einsum("tij,tj->ti", R_inv, com_vel_global)  # (T, 3)
    com_vel_planar = com_vel_local[:, [0, 2]]     # (T, 2)  [v_x, v_z]

    # 5. Root position relative to CoM pivot, in local frame
    root_offset_global = joints[:, 0] - com_ground  # (T, 3)
    root_offset_local = np.einsum("tij,tj->ti", R_inv, root_offset_global)

    # 6. Root rotation relative to heading, encoded as 6D
    root_rotmat = axis_angle_to_matrix(root_orient)               # (T, 3, 3)
    root_rotmat_local = np.einsum("tij,tjk->tik", R_inv, root_rotmat)
    root_rot6d = matrix_to_rotation_6d(root_rotmat_local)         # (T, 6)

    # 7. Body joint rotations -> 6D (already in parent-local frame)
    body_rotmat = axis_angle_to_matrix(body_pose)                 # (T, 21, 3, 3)
    body_rot6d = matrix_to_rotation_6d(body_rotmat).reshape(T, -1)  # (T, 126)

    # Assemble
    mei = np.concatenate([
        com_vel_planar,             # 0:2    (2)
        heading_vel[:, None],       # 2:3    (1)
        root_offset_local,          # 3:6    (3)
        root_rot6d,                 # 6:12   (6)
        body_rot6d,                 # 12:138 (126)
    ], axis=-1)

    assert mei.shape == (T, MEI_DIM), f"Expected ({T}, {MEI_DIM}), got {mei.shape}"
    return mei


# ------------------------------------------------------------------
# Decoding: MEI -> SMPL-H
# ------------------------------------------------------------------

def mei_to_smplh(
    mei: np.ndarray,
    fps: float = 30.0,
    initial_heading: float = 0.0,
    initial_com: np.ndarray = np.array([0.0, 0.0])
) -> dict:
    """Convert MEI representation back to SMPL-H parameters.

    Because MEI is SE(2)-invariant, the absolute ground-plane position and
    yaw are lost.  They are recovered from *initial_heading* and
    *initial_com* (both default to zero, placing the motion at the origin
    facing +Z).

    Args:
        mei:             (T, 138) MEI representation.
        fps:             frame rate (default 30).
        initial_heading: heading angle at frame 0 (radians).
        initial_com:     (2,) XZ position of CoM at frame 0, default [0, 0].

    Returns:
        dict with:
            root_orient: (T, 3)  axis-angle root rotation.
            body_pose:   (T, 63) axis-angle body joint rotations.
            transl:      (T, 3)  global root translation.
    """
    T = mei.shape[0]

    # --- Parse ---
    com_vel_planar    = mei[:, 0:2]       # (T, 2)
    heading_vel       = mei[:, 2]         # (T,)
    root_offset_local = mei[:, 3:6]       # (T, 3)
    root_rot6d        = mei[:, 6:12]      # (T, 6)
    body_rot6d        = mei[:, 12:138]    # (T, 126)

    # 1. Integrate heading angular velocity
    heading = np.zeros(T)
    heading[0] = initial_heading
    for t in range(1, T):
        heading[t] = heading[t - 1] + heading_vel[t] / fps
    heading = wrap_angle(heading)

    # 2. Integrate CoM ground trajectory
    R_heading = yaw_rotation_matrix(heading)  # (T, 3, 3)

    com_vel_local_3d = np.zeros((T, 3))
    com_vel_local_3d[:, 0] = com_vel_planar[:, 0]
    com_vel_local_3d[:, 2] = com_vel_planar[:, 1]

    com_vel_global = np.einsum("tij,tj->ti", R_heading, com_vel_local_3d)

    com_ground = np.zeros((T, 3))
    com_ground[0, 0] = initial_com[0]
    com_ground[0, 2] = initial_com[1]
    for t in range(1, T):
        com_ground[t] = com_ground[t - 1] + com_vel_global[t] / fps

    # 3. Root position (global)
    root_offset_global = np.einsum("tij,tj->ti", R_heading, root_offset_local)
    transl = com_ground + root_offset_global

    # 4. Root rotation (global)
    root_rotmat_local = rotation_6d_to_matrix(root_rot6d)        # (T, 3, 3)
    root_rotmat = np.einsum("tij,tjk->tik", R_heading, root_rotmat_local)
    root_orient = matrix_to_axis_angle(root_rotmat)              # (T, 3)

    # 5. Body joint rotations -> axis-angle
    body_rotmat = rotation_6d_to_matrix(body_rot6d.reshape(T, 21, 6))
    body_pose = matrix_to_axis_angle(body_rotmat).reshape(T, 63)

    return {
        "root_orient": root_orient,
        "body_pose": body_pose,
        "transl": transl,
    }