File size: 4,589 Bytes
fc3ca1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | """
utils/postprocess.py
ββββββββββββββββββββ
Optional post-processing utilities applied to raw MotionBERT output before
exporting. None of these are required β the pipeline works without them β
but they noticeably improve visual quality for real-world videos.
Functions
βββββββββ
smooth_poses(poses, window) β Gaussian temporal smoothing (removes jitter)
resample_poses(poses, src_fps, dst_fps) β Resample to a target frame rate
centre_trajectory(poses) β Root always starts at world origin
apply_floor(poses) β Push lowest foot Y to Y=0 (no ground clipping)
"""
from __future__ import annotations
import numpy as np
from scipy.ndimage import gaussian_filter1d
def smooth_poses(
poses: np.ndarray,
sigma: float = 1.5,
) -> np.ndarray:
"""
Apply Gaussian temporal smoothing to (T, N_joints, 3) pose data.
Parameters
----------
poses : (T, J, 3) float32
sigma : standard deviation of the Gaussian kernel (frames).
Larger = smoother but more lag. 1.0-2.5 is usually good.
Returns
-------
smoothed : (T, J, 3) float32
"""
# Smooth independently along the time axis (axis 0) for each joint & coord
return gaussian_filter1d(poses, sigma=sigma, axis=0).astype(np.float32)
def resample_poses(
poses: np.ndarray,
src_fps: float,
dst_fps: float,
) -> tuple[np.ndarray, float]:
"""
Resample poses from src_fps to dst_fps using linear interpolation.
Parameters
----------
poses : (T_src, J, 3)
src_fps : frames per second of the input
dst_fps : desired output frame rate
Returns
-------
resampled : (T_dst, J, 3)
dst_fps : actual output fps (same as input dst_fps)
"""
if abs(src_fps - dst_fps) < 0.01:
return poses, src_fps
T_src = poses.shape[0]
duration = (T_src - 1) / src_fps
T_dst = max(2, int(round(duration * dst_fps)) + 1)
src_times = np.linspace(0.0, duration, T_src)
dst_times = np.linspace(0.0, duration, T_dst)
# Interpolate each joint and coordinate independently
J = poses.shape[1]
out = np.zeros((T_dst, J, 3), dtype=np.float32)
for j in range(J):
for c in range(3):
out[:, j, c] = np.interp(dst_times, src_times, poses[:, j, c])
return out, float(dst_fps)
def centre_trajectory(poses: np.ndarray) -> np.ndarray:
"""
Translate the entire animation so that the root joint (joint 0 = Hips)
starts at XZ origin. Y is left unchanged (height is meaningful).
Parameters
----------
poses : (T, J, 3)
Returns
-------
centred : (T, J, 3)
"""
out = poses.copy()
offset = poses[0, 0, :].copy()
offset[1] = 0.0 # keep vertical component
out -= offset[None, None, :]
return out
def apply_floor(
poses: np.ndarray,
foot_joints: list[int] | None = None,
) -> np.ndarray:
"""
Shift the animation vertically so that the lowest foot position sits at Y=0.
Parameters
----------
poses : (T, J, 3) in metres, Y-up
foot_joints : joint indices treated as feet.
Defaults to H36M joints 3, 6 (R-ankle, L-ankle).
Returns
-------
floored : (T, J, 3)
"""
if foot_joints is None:
foot_joints = [3, 6] # H36M R-ankle, L-ankle
min_y = poses[:, foot_joints, 1].min()
if min_y < 0.0:
out = poses.copy()
out[:, :, 1] -= min_y
return out
return poses
def full_postprocess(
poses: np.ndarray,
fps: float,
*,
smooth_sigma: float = 1.5,
target_fps: float | None = None,
do_centre: bool = True,
do_floor: bool = True,
) -> tuple[np.ndarray, float]:
"""
Apply the full post-processing chain in the recommended order.
Parameters
----------
poses : (T, 17, 3) raw MotionBERT output in metres
fps : source frame rate
smooth_sigma : temporal smoothing strength (0 = off)
target_fps : if set, resample to this FPS after smoothing
do_centre : translate root start to XZ origin
do_floor : push lowest foot to Y=0
Returns
-------
(processed_poses, effective_fps)
"""
p = poses.copy()
if smooth_sigma > 0.0:
p = smooth_poses(p, sigma=smooth_sigma)
if target_fps is not None and abs(target_fps - fps) > 0.01:
p, fps = resample_poses(p, fps, target_fps)
if do_centre:
p = centre_trajectory(p)
if do_floor:
p = apply_floor(p)
return p, fps
|