vr-hmr / scripts /demo /render_npz_global.py

Upload folder using huggingface_hub

7e120dd about 2 months ago

6.92 kB

	import argparse
	from pathlib import Path
	from typing import Dict, Tuple

	import numpy as np
	import torch

	from genmo.utils.geo_transform import apply_T_on_points, compute_T_ayfz2ay
	from genmo.utils.video_io_utils import get_writer
	from genmo.utils.vis.renderer import (
	Renderer,
	get_global_cameras_static,
	get_ground_params_from_points,
	)
	from third_party.GVHMR.hmr4d.utils.geo.hmr_cam import create_camera_sensor
	from third_party.GVHMR.hmr4d.utils.smplx_utils import make_smplx


	def _load_motion_npz(npz_path: Path) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float, str]:
	with np.load(npz_path, allow_pickle=True) as d:
	poses = np.asarray(d["poses"], dtype=np.float32)
	trans = np.asarray(d["trans"], dtype=np.float32)
	betas = np.asarray(d["betas"], dtype=np.float32).reshape(-1)
	fps = float(np.asarray(d.get("mocap_framerate", 30.0)))
	gender = str(np.asarray(d.get("gender", "neutral")))
	if poses.ndim != 2 or poses.shape[1] < 66:
	raise ValueError(f"Expected poses (F,165) or (F,>=66); got {poses.shape}")
	if trans.ndim != 2 or trans.shape[1] != 3:
	raise ValueError(f"Expected trans (F,3); got {trans.shape}")
	if betas.shape[0] < 10:
	betas = np.pad(betas, (0, 10 - betas.shape[0]))
	betas = betas[:10]
	if trans.shape[0] != poses.shape[0]:
	raise ValueError(f"poses and trans length mismatch: {poses.shape[0]} vs {trans.shape[0]}")
	return poses, trans, betas, fps, gender


	def _split_smplx_poses(poses165: torch.Tensor) -> Dict[str, torch.Tensor]:
	# SMPL-X pose layout: [global(3), body(63), jaw(3), leye(3), reye(3), lhand(45), rhand(45)] = 165
	global_orient = poses165[:, 0:3]
	body_pose = poses165[:, 3:66]
	extra = poses165[:, 66:]
	params = {
	"global_orient": global_orient,
	"body_pose": body_pose,
	}
	if extra.shape[1] >= 99:
	params.update(
	{
	"jaw_pose": extra[:, 0:3],
	"leye_pose": extra[:, 3:6],
	"reye_pose": extra[:, 6:9],
	"left_hand_pose": extra[:, 9:54],
	"right_hand_pose": extra[:, 54:99],
	}
	)
	return params


	def _try_smplx_forward(smplx, params: Dict[str, torch.Tensor]) -> torch.Tensor:
	try:
	out = smplx(**params)
	verts = out.vertices if hasattr(out, "vertices") else out[0].vertices
	return verts
	except (TypeError, RuntimeError):
	# Fallback: model variant doesn't take hand/face params (or expects PCA hand pose dims).
	keep = {k: v for k, v in params.items() if k in {"global_orient", "body_pose", "betas", "transl"}}
	out = smplx(**keep)
	verts = out.vertices if hasattr(out, "vertices") else out[0].vertices
	return verts


	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--npz", required=True, type=str)
	ap.add_argument("--out", required=True, type=str)
	ap.add_argument("--max_frames", type=int, default=300, help="Max rendered frames (uniformly sampled). Use -1 for all.")
	ap.add_argument("--size", type=int, default=512)
	ap.add_argument("--f_mm", type=float, default=24.0)
	ap.add_argument("--crf", type=int, default=23)
	args = ap.parse_args()

	npz_path = Path(args.npz)
	out_path = Path(args.out)
	out_path.parent.mkdir(parents=True, exist_ok=True)

	poses, trans, betas, fps, gender = _load_motion_npz(npz_path)
	total = poses.shape[0]
	if args.max_frames is None or args.max_frames == 0:
	raise ValueError("--max_frames must be -1 or a positive integer")
	if args.max_frames < 0 or args.max_frames >= total:
	idxs = np.arange(total, dtype=np.int64)
	else:
	idxs = np.linspace(0, total - 1, int(args.max_frames), dtype=np.int64)
	# Keep the original FPS even when subsampling frames (avoids tiny fps which can
	# overflow pyav's rational conversion on some builds).
	fps_out = fps

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	smplx = make_smplx("supermotion").to(device).eval()

	poses_t = torch.from_numpy(poses[idxs]).to(device)
	trans_t = torch.from_numpy(trans[idxs]).to(device)
	betas_t = torch.from_numpy(betas[None]).to(device).repeat(len(idxs), 1)

	params = _split_smplx_poses(poses_t)
	params["betas"] = betas_t
	params["transl"] = trans_t

	with torch.inference_mode():
	verts_smplx = _try_smplx_forward(smplx, params)

	# Convert to SMPL topology if possible (better matches the regressor + faster render).
	smplx2smpl_path = Path("third_party/GVHMR/hmr4d/utils/body_model/smplx2smpl_sparse.pt")
	if smplx2smpl_path.exists():
	smplx2smpl = torch.load(smplx2smpl_path, map_location=device)
	verts = torch.stack([torch.matmul(smplx2smpl, v) for v in verts_smplx])
	faces = make_smplx("smpl", gender="male").faces
	else:
	verts = verts_smplx
	faces = smplx.faces

	# Align like infer_video.py (ground + face-Z)
	j_reg_path = Path("third_party/GVHMR/inputs/checkpoints/body_models/smpl_neutral_J_regressor.pt")
	J_reg = torch.load(j_reg_path, map_location=device) if j_reg_path.exists() else None
	if J_reg is not None and verts.shape[1] == J_reg.shape[-1]:
	root0 = torch.matmul(J_reg, verts[0])[0]
	offset = root0.clone()
	else:
	J_reg = None
	offset = verts[0].mean(0)
	offset[1] = verts[..., 1].min()
	verts = verts - offset

	if J_reg is not None:
	joints0 = torch.matmul(J_reg, verts[0])[None]
	T_ay2ayfz = compute_T_ayfz2ay(joints0, inverse=True)
	verts = apply_T_on_points(verts, T_ay2ayfz)

	size = int(args.size)
	_, _, K = create_camera_sensor(size, size, float(args.f_mm))
	renderer = Renderer(size, size, device=device, faces=faces, K=K.to(device), bin_size=0)

	global_R, global_T, global_lights = get_global_cameras_static(
	verts.detach().cpu(),
	beta=2.0,
	cam_height_degree=20,
	target_center_height=1.0,
	device=str(device),
	)
	if J_reg is not None:
	roots = torch.einsum("jv,fvi->fji", J_reg, verts)[..., 0, :]
	else:
	roots = verts.mean(1)
	scale, cx, cz = get_ground_params_from_points(roots.detach().cpu(), verts.detach().cpu())
	renderer.set_ground(scale * 1.5, cx, cz)

	writer = get_writer(str(out_path), fps=float(fps_out), crf=int(args.crf))
	try:
	color = torch.tensor([[0.8, 0.2, 0.8]], device=device) # purple-ish
	for i in range(verts.shape[0]):
	cameras = renderer.create_camera(global_R[i], global_T[i])
	img = renderer.render_with_ground(verts[[i]], color, cameras, global_lights)
	writer.write_frame(img.astype(np.uint8))
	finally:
	writer.close()


	if __name__ == "__main__":
	main()