zirobtc commited on
Commit
69b50e2
·
verified ·
1 Parent(s): a50b63b

Upload demo_motionstreamer.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. demo_motionstreamer.py +58 -88
demo_motionstreamer.py CHANGED
@@ -12,76 +12,34 @@ from utils.face_z_align_util import rotation_6d_to_matrix, matrix_to_axis_angle,
12
  from sentence_transformers import SentenceTransformer
13
  warnings.filterwarnings('ignore')
14
 
 
15
  def save_motion_as_bvh(motion_data, output_path, fps=30):
16
- """
17
- Saves a denormalized 272-dim motion numpy array to a BVH file.
18
- """
19
  print(f"--- Starting direct conversion to BVH: {os.path.basename(output_path)} ---")
20
  try:
21
- if isinstance(motion_data, torch.Tensor):
22
- motion_data = motion_data.detach().cpu().numpy()
23
- if motion_data.ndim == 3 and motion_data.shape[0] == 1:
24
- motion_data = motion_data.squeeze(0)
25
- elif motion_data.ndim != 2:
26
- raise ValueError(f"Input motion data must be 2D, but got shape {motion_data.shape}")
27
-
28
- njoint = 22
29
- nfrm, _ = motion_data.shape
30
-
31
  rotations_matrix = rotation_6d_to_matrix(torch.from_numpy(motion_data[:, 8+6*njoint : 8+12*njoint]).reshape(nfrm, -1, 6)).numpy()
32
-
33
  global_heading_diff_rot_6d = torch.from_numpy(motion_data[:, 2:8])
34
  global_heading_diff_rot = rotation_6d_to_matrix(global_heading_diff_rot_6d).numpy()
35
- global_heading_rot = np.zeros_like(global_heading_diff_rot)
36
- global_heading_rot[0] = global_heading_diff_rot[0]
37
- for i in range(1, nfrm):
38
- global_heading_rot[i] = np.matmul(global_heading_diff_rot[i], global_heading_rot[i-1])
39
-
40
- velocities_root_xy = motion_data[:, :2]
41
- height = motion_data[:, 8 : 8+3*njoint].reshape(nfrm, -1, 3)[:, 0, 1]
42
-
43
- inv_global_heading_rot = np.transpose(global_heading_rot, (0, 2, 1))
44
- rotations_matrix[:, 0, ...] = np.matmul(inv_global_heading_rot, rotations_matrix[:, 0, ...])
45
-
46
- velocities_root_xyz = np.zeros((nfrm, 3))
47
- velocities_root_xyz[:, 0] = velocities_root_xy[:, 0]
48
- velocities_root_xyz[:, 2] = velocities_root_xy[:, 1]
49
  velocities_root_xyz[1:, :] = np.matmul(inv_global_heading_rot[:-1], velocities_root_xyz[1:, :, None]).squeeze(-1)
50
- root_translation = np.cumsum(velocities_root_xyz, axis=0)
51
- root_translation[:, 1] = height
52
-
53
- axis_angle = matrix_to_axis_angle(torch.from_numpy(rotations_matrix)).numpy().reshape(nfrm, -1)
54
- poses_24_joints = np.zeros((nfrm, 72))
55
- poses_24_joints[:, :66] = axis_angle
56
-
57
- model = smplx.create(model_path="body_models/human_model_files", model_type="smpl", gender="NEUTRAL")
58
- parents = model.parents.detach().cpu().numpy()
59
- rest_pose = model().joints.detach().cpu().numpy().squeeze()[:24,:]
60
- offsets = rest_pose - rest_pose[parents]
61
- offsets[0] = np.array([0,0,0])
62
-
63
- rotations_quat = axis_angle_to_quaternion(torch.from_numpy(poses_24_joints.reshape(-1, 24, 3))).numpy()
64
- rotations_euler = np.degrees(quat.to_euler(rotations_quat, order="zyx"))
65
- positions = np.zeros_like(rotations_quat[..., :3])
66
- positions[:, 0] = root_translation
67
-
68
- joint_names = [
69
- "Pelvis", "Left_hip", "Right_hip", "Spine1", "Left_knee", "Right_knee", "Spine2",
70
- "Left_ankle", "Right_ankle", "Spine3", "Left_foot", "Right_foot", "Neck",
71
- "Left_collar", "Right_collar", "Head", "Left_shoulder", "Right_shoulder",
72
- "Left_elbow", "Right_elbow", "Left_wrist", "Right_wrist", "Left_hand", "Right_hand"
73
- ]
74
-
75
- bvh.save(output_path, {
76
- "rotations": rotations_euler, "positions": positions, "offsets": offsets,
77
- "parents": parents, "names": joint_names, "order": "zyx", "frametime": 1.0 / fps,
78
- })
79
  print(f"✅ BVH file saved successfully to {output_path}")
80
-
81
  except Exception as e:
82
- print(f"❌ BVH Conversion Failed. Error: {e}")
83
- import traceback
84
- traceback.print_exc()
85
 
86
 
87
  if __name__ == '__main__':
@@ -102,56 +60,68 @@ if __name__ == '__main__':
102
  latent_dim=16, clip_range=[-30, 20]
103
  )
104
  tae_ckpt = torch.load('Causal_TAE_t2m_babel/net_last.pth', map_location='cpu')
105
- # The TAE checkpoint has weights stored under the 'net' key
106
- tae_net.load_state_dict(tae_ckpt['net'], strict=True)
107
  tae_net.eval()
108
  tae_net.to(comp_device)
109
 
110
  config = LLaMAHFConfig.from_name('Normal_size')
111
  config.block_size = 78
112
  trans_encoder = LLaMAHF(config, args.num_diffusion_head_layers, args.latent_dim, comp_device)
113
- print("Loading your trained MotionStreamer checkpoint from 'Experiments/motionstreamer_model/latest.pth'...")
114
- trans_ckpt = torch.load('Experiments/motionstreamer_model/latest.pth', map_location='cpu')
115
- trans_encoder.load_state_dict(trans_ckpt['trans'], strict=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  trans_encoder.eval()
117
  trans_encoder.to(comp_device)
118
 
 
119
  print("Loading mean/std from BABEL dataset...")
120
  mean = np.load('babel_272/t2m_babel_mean_std/Mean.npy')
121
  std = np.load('babel_272/t2m_babel_mean_std/Std.npy')
122
 
123
- # --- Inference ---
124
- # The history must be a 2D tensor for the model's unsqueeze operation to work
125
- motion_history = torch.empty(0, 16).to(comp_device)
126
-
127
- # --- KEY PARAMETER TO TUNE ---
128
- # The author often uses a higher CFG scale for more dynamic motions.
129
- # Let's try 7.0, which is a common value in their code.
130
- cfg_scale = 10.0
131
-
132
  print(f"Generating motion for text: '{args.text}' with CFG scale: {cfg_scale}")
133
  with torch.no_grad():
134
- # REVERTED to the correct streaming-native inference function
135
- _, motion_latents = trans_encoder.sample_for_eval_CFG_babel_inference_new_demo(
136
- B_text=args.text,
137
- A_motion=motion_history,
138
- tokenizer='t5-xxl',
139
  clip_model=t5_model,
140
  device=comp_device,
141
- cfg=cfg_scale, # Use the tuned CFG scale
142
- length=240 # Generate a longer sequence for "run"
 
143
  )
144
-
145
  print("Decoding latents to full motion...")
146
  motion_seqs = tae_net.forward_decoder(motion_latents)
147
 
148
  motion = motion_seqs.detach().cpu().numpy()
149
  motion_denormalized = motion * std + mean
150
-
151
- # --- Save Output ---
152
  output_dir = 'demo_output_streamer'
153
- if not os.path.exists(output_dir):
154
- os.makedirs(output_dir)
155
-
156
  output_bvh_path = os.path.join(output_dir, f'{args.text.replace(" ", "_")}_cfg{cfg_scale}.bvh')
157
  save_motion_as_bvh(motion_denormalized, output_bvh_path, fps=30)
 
12
  from sentence_transformers import SentenceTransformer
13
  warnings.filterwarnings('ignore')
14
 
15
+ # --- save_motion_as_bvh function is unchanged ---
16
  def save_motion_as_bvh(motion_data, output_path, fps=30):
 
 
 
17
  print(f"--- Starting direct conversion to BVH: {os.path.basename(output_path)} ---")
18
  try:
19
+ if isinstance(motion_data, torch.Tensor): motion_data = motion_data.detach().cpu().numpy()
20
+ if motion_data.ndim == 3 and motion_data.shape[0] == 1: motion_data = motion_data.squeeze(0)
21
+ elif motion_data.ndim != 2: raise ValueError(f"Input motion data must be 2D, but got shape {motion_data.shape}")
22
+ njoint = 22; nfrm, _ = motion_data.shape
 
 
 
 
 
 
23
  rotations_matrix = rotation_6d_to_matrix(torch.from_numpy(motion_data[:, 8+6*njoint : 8+12*njoint]).reshape(nfrm, -1, 6)).numpy()
 
24
  global_heading_diff_rot_6d = torch.from_numpy(motion_data[:, 2:8])
25
  global_heading_diff_rot = rotation_6d_to_matrix(global_heading_diff_rot_6d).numpy()
26
+ global_heading_rot = np.zeros_like(global_heading_diff_rot); global_heading_rot[0] = global_heading_diff_rot[0]
27
+ for i in range(1, nfrm): global_heading_rot[i] = np.matmul(global_heading_diff_rot[i], global_heading_rot[i-1])
28
+ velocities_root_xy = motion_data[:, :2]; height = motion_data[:, 8 : 8+3*njoint].reshape(nfrm, -1, 3)[:, 0, 1]
29
+ inv_global_heading_rot = np.transpose(global_heading_rot, (0, 2, 1)); rotations_matrix[:, 0, ...] = np.matmul(inv_global_heading_rot, rotations_matrix[:, 0, ...])
30
+ velocities_root_xyz = np.zeros((nfrm, 3)); velocities_root_xyz[:, 0] = velocities_root_xy[:, 0]; velocities_root_xyz[:, 2] = velocities_root_xy[:, 1]
 
 
 
 
 
 
 
 
 
31
  velocities_root_xyz[1:, :] = np.matmul(inv_global_heading_rot[:-1], velocities_root_xyz[1:, :, None]).squeeze(-1)
32
+ root_translation = np.cumsum(velocities_root_xyz, axis=0); root_translation[:, 1] = height
33
+ axis_angle = matrix_to_axis_angle(torch.from_numpy(rotations_matrix)).numpy().reshape(nfrm, -1); poses_24_joints = np.zeros((nfrm, 72)); poses_24_joints[:, :66] = axis_angle
34
+ model = smplx.create(model_path="body_models/human_model_files", model_type="smpl", gender="NEUTRAL"); parents = model.parents.detach().cpu().numpy()
35
+ rest_pose = model().joints.detach().cpu().numpy().squeeze()[:24,:]; offsets = rest_pose - rest_pose[parents]; offsets[0] = np.array([0,0,0])
36
+ rotations_quat = axis_angle_to_quaternion(torch.from_numpy(poses_24_joints.reshape(-1, 24, 3))).numpy(); rotations_euler = np.degrees(quat.to_euler(rotations_quat, order="zyx"))
37
+ positions = np.zeros_like(rotations_quat[..., :3]); positions[:, 0] = root_translation
38
+ joint_names = ["Pelvis", "Left_hip", "Right_hip", "Spine1", "Left_knee", "Right_knee", "Spine2", "Left_ankle", "Right_ankle", "Spine3", "Left_foot", "Right_foot", "Neck", "Left_collar", "Right_collar", "Head", "Left_shoulder", "Right_shoulder", "Left_elbow", "Right_elbow", "Left_wrist", "Right_wrist", "Left_hand", "Right_hand"]
39
+ bvh.save(output_path, {"rotations": rotations_euler, "positions": positions, "offsets": offsets, "parents": parents, "names": joint_names, "order": "zyx", "frametime": 1.0 / fps})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  print(f"✅ BVH file saved successfully to {output_path}")
 
41
  except Exception as e:
42
+ print(f"❌ BVH Conversion Failed. Error: {e}"); import traceback; traceback.print_exc()
 
 
43
 
44
 
45
  if __name__ == '__main__':
 
60
  latent_dim=16, clip_range=[-30, 20]
61
  )
62
  tae_ckpt = torch.load('Causal_TAE_t2m_babel/net_last.pth', map_location='cpu')
63
+ tae_net.load_state_dict(tae_ckpt['net'], strict=True)
 
64
  tae_net.eval()
65
  tae_net.to(comp_device)
66
 
67
  config = LLaMAHFConfig.from_name('Normal_size')
68
  config.block_size = 78
69
  trans_encoder = LLaMAHF(config, args.num_diffusion_head_layers, args.latent_dim, comp_device)
70
+
71
+ # --- THIS IS THE FIX ---
72
+ print("Loading your trained MotionStreamer checkpoint from 'motionstreamer_model/latest.pth'...")
73
+ # Make sure this path is correct relative to where you run the script
74
+ checkpoint_path = 'motionstreamer_model/latest.pth'
75
+ trans_ckpt = torch.load(checkpoint_path, map_location='cpu')
76
+
77
+ # Create a new state dict without the 'module.' prefix
78
+ unwrapped_state_dict = {}
79
+ for key, value in trans_ckpt['trans'].items():
80
+ if key.startswith('module.'):
81
+ # Strip the 'module.' prefix
82
+ unwrapped_state_dict[key[len('module.'):]] = value
83
+ else:
84
+ # Keep keys that don't have the prefix (just in case)
85
+ unwrapped_state_dict[key] = value
86
+
87
+ # Load the unwrapped state dict
88
+ trans_encoder.load_state_dict(unwrapped_state_dict, strict=True)
89
+ print("Successfully loaded unwrapped checkpoint.")
90
+ # --- END FIX ---
91
+
92
  trans_encoder.eval()
93
  trans_encoder.to(comp_device)
94
 
95
+ # --- Rest of the script is unchanged ---
96
  print("Loading mean/std from BABEL dataset...")
97
  mean = np.load('babel_272/t2m_babel_mean_std/Mean.npy')
98
  std = np.load('babel_272/t2m_babel_mean_std/Std.npy')
99
 
100
+ motion_history = torch.empty(0, 16).to(comp_device)
101
+ cfg_scale = 10.0
102
+
 
 
 
 
 
 
103
  print(f"Generating motion for text: '{args.text}' with CFG scale: {cfg_scale}")
104
  with torch.no_grad():
105
+ # Use the new two-forward sampling method to match training
106
+ _, motion_latents = trans_encoder.sample_for_eval_CFG_babel_inference_two_forward(
107
+ B_text=args.text,
108
+ A_motion=motion_history,
109
+ tokenizer='t5-xxl',
110
  clip_model=t5_model,
111
  device=comp_device,
112
+ cfg=cfg_scale,
113
+ length=240,
114
+ temperature=1.3
115
  )
116
+
117
  print("Decoding latents to full motion...")
118
  motion_seqs = tae_net.forward_decoder(motion_latents)
119
 
120
  motion = motion_seqs.detach().cpu().numpy()
121
  motion_denormalized = motion * std + mean
122
+
 
123
  output_dir = 'demo_output_streamer'
124
+ if not os.path.exists(output_dir): os.makedirs(output_dir)
125
+
 
126
  output_bvh_path = os.path.join(output_dir, f'{args.text.replace(" ", "_")}_cfg{cfg_scale}.bvh')
127
  save_motion_as_bvh(motion_denormalized, output_bvh_path, fps=30)