MotionLCM

Running

App Files Files Community

MotionLCM / app.py

soumyanilain

Update app.py

3edb540 verified 12 days ago

raw

history blame contribute delete

16.1 kB

	"""MotionLCM - Real-Time Text-to-Motion Generation
	Gradio Interface for Hugging Face Spaces
	Author: Soumyanil Ain \| MS CS \| UNC Charlotte
	"""
	import os, sys, time, torch, tempfile, subprocess, pickle, glob
	import numpy as np

	REPO_DIR = os.path.dirname(os.path.abspath(__file__))
	sys.path.insert(0, REPO_DIR)

	DATASET_DIR = os.path.join(REPO_DIR, "datasets", "humanml3d")
	os.makedirs(os.path.join(DATASET_DIR, "new_joint_vecs"), exist_ok=True)
	os.makedirs(os.path.join(DATASET_DIR, "texts"), exist_ok=True)
	os.makedirs(os.path.join(DATASET_DIR, "new_joints"), exist_ok=True)

	np.save(os.path.join(DATASET_DIR, "new_joint_vecs", "000000.npy"),
	np.zeros((60, 263), dtype=np.float32))
	np.save(os.path.join(DATASET_DIR, "new_joints", "000000.npy"),
	np.zeros((60, 22, 3), dtype=np.float32))

	with open(os.path.join(DATASET_DIR, "texts", "000000.txt"), "w") as f:
	f.write("a person stands still.\n")

	with open(os.path.join(DATASET_DIR, "test.txt"), "w") as f:
	f.write("000000\n")

	WRAPPER = os.path.join(REPO_DIR, "run_demo_patched.py")
	with open(WRAPPER, "w") as f:
	f.write('''
	import sys, importlib
	import numpy as np
	import mld.data.humanml.dataset as ds_module
	_orig_init = ds_module.Text2MotionDataset.__init__
	def _patched_init(self, args, *kwargs):
	try:
	_orig_init(self, args, *kwargs)
	except ValueError as e:
	if "not enough values to unpack" in str(e):
	print(f"[PATCH] Empty dataset detected, creating minimal dummy data")
	self.name_list = ["000000"]
	self.length_arr = np.array([60])
	self.data_dict = {"000000": {"motion": np.zeros((60, 263), dtype=np.float32), "length": 60}}
	self.nfeats = 263
	self.max_length = 60
	self.pointer = 0
	self.num_actions = 1
	else:
	raise
	ds_module.Text2MotionDataset.__init__ = _patched_init
	exec(open("demo.py").read())
	''')

	PLOT_SCRIPT = os.path.join(REPO_DIR, "mld", "data", "humanml", "utils", "plot_script.py")
	if os.path.exists(PLOT_SCRIPT):
	with open(PLOT_SCRIPT, "r") as f:
	c = f.read()
	changed = False
	if "ax.lines = []" in c:
	c = c.replace("ax.lines = []", "while ax.lines: ax.lines[0].remove()")
	changed = True
	if "ax.collections = []" in c:
	c = c.replace("ax.collections = []", "while ax.collections: ax.collections[0].remove()")
	changed = True
	if changed:
	with open(PLOT_SCRIPT, "w") as f:
	f.write(c)

	import gradio as gr
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	from matplotlib.animation import FuncAnimation

	KINEMATIC_CHAIN = [
	[0,2,5,8,11], [0,1,4,7,10], [0,3,6,9,12,15],
	[9,14,17,19,21], [9,13,16,18,20],
	]
	COLORS = ["#EF4444","#3B82F6","#10B981","#F97316","#8B5CF6"]
	LABELS = ["R Leg","L Leg","Spine","R Arm","L Arm"]


	def render_video(joints, text="", fps=20):
	nf = len(joints)
	fig = plt.figure(figsize=(8,6), dpi=100)
	ax = fig.add_subplot(111, projection="3d")
	ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2]
	m = 0.4
	def update(f):
	ax.cla()
	ax.set_xlim([ax_x.min()-m, ax_x.max()+m])
	ax.set_ylim([ax_z.min()-m, ax_z.max()+m])
	ax.set_zlim([ax_y.min()-m, ax_y.max()+m])
	title = text[:55] if text else "Generated Motion"
	ax.set_title(f"{title}\nFrame {f+1}/{nf}", fontsize=10)
	ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y")
	for ch, co, la in zip(KINEMATIC_CHAIN, COLORS, LABELS):
	v = [j for j in ch if j < joints.shape[1]]
	ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1],
	color=co, lw=2.5, marker="o", ms=4, label=la if f==0 else "")
	if f == 0: ax.legend(fontsize=7, loc="upper left")
	return []
	anim = FuncAnimation(fig, update, frames=nf, interval=1000/fps, blit=False)
	tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
	anim.save(tmp.name, writer="ffmpeg", fps=fps)
	plt.close(fig)
	return tmp.name


	def render_overlay(joints, text=""):
	nf = len(joints)
	fig = plt.figure(figsize=(10,7), dpi=120)
	ax = fig.add_subplot(111, projection="3d")
	step = max(1, nf//10)
	frames = list(range(0, nf, step))
	if nf-1 not in frames: frames.append(nf-1)
	ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2]
	m = 0.5
	ax.set_xlim([ax_x.min()-m, ax_x.max()+m])
	ax.set_ylim([ax_z.min()-m, ax_z.max()+m])
	ax.set_zlim([ax_y.min()-m, ax_y.max()+m])
	ns = max(len(frames)-1, 1)
	for i, f in enumerate(frames):
	a = 0.12 + 0.88(i/ns); lw = 1 + 2.5(i/ns)
	for ch, co in zip(KINEMATIC_CHAIN, COLORS):
	v = [j for j in ch if j < joints.shape[1]]
	ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1],
	color=co, lw=lw, alpha=a, marker="o", ms=2.5*a)
	r = joints[:,0,:]
	ax.plot(r[:,0], r[:,2], r[:,1], color="white", lw=1, alpha=0.5, ls="--", label="Root")
	title = text[:55] if text else "Generated Motion"
	ax.set_title(f"{title}\n{nf} frames @ ~20fps", fontsize=11, color="white")
	ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y")
	ax.set_facecolor("#0F172A"); fig.patch.set_facecolor("#0F172A")
	ax.tick_params(colors="#94A3B8")
	ax.xaxis.label.set_color("#94A3B8")
	ax.yaxis.label.set_color("#94A3B8")
	ax.zaxis.label.set_color("#94A3B8")
	ax.legend(fontsize=8, facecolor="#1E293B", labelcolor="white")
	tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	plt.savefig(tmp.name, dpi=120, bbox_inches="tight", facecolor="#0F172A")
	plt.close(fig)
	return tmp.name


	def generate(prompt, duration, method, seed):
	if not prompt or not prompt.strip():
	return None, None, "Please enter a text prompt."
	cfg_map = {
	"MotionLCM (Real-time, 1-4 steps)": "motionlcm_t2m.yaml",
	"MLD (Baseline, ~50 steps)": "mld_t2m.yaml",
	}
	cfg_name = cfg_map.get(method, "motionlcm_t2m.yaml")
	fps = 20
	nframes = max(20, min(300, int(duration * fps)))
	pf = os.path.join(REPO_DIR, "assets", "_gradio_prompt.txt")
	os.makedirs(os.path.dirname(pf), exist_ok=True)
	with open(pf, "w") as f:
	f.write(f"{nframes} {prompt.strip()}")
	env = os.environ.copy()
	if seed >= 0:
	env["PYTHONHASHSEED"] = str(int(seed))
	for td in ["experiments_t2m_test", "experiments_control_test"]:
	old_pkls = glob.glob(os.path.join(REPO_DIR, td, "*", ".pkl"), recursive=True)
	for p in old_pkls:
	os.remove(p)
	t0 = time.time()
	result = subprocess.run(
	["python", "run_demo_patched.py", "--cfg", f"configs/{cfg_name}", "--example", pf],
	cwd=REPO_DIR, capture_output=True, text=True, timeout=600, env=env
	)
	elapsed = time.time() - t0
	pkls = []
	for td in ["experiments_t2m_test", "experiments_control_test"]:
	pkls.extend(sorted(glob.glob(os.path.join(REPO_DIR, td, "*", ".pkl"), recursive=True)))
	if not pkls:
	stderr_tail = result.stderr[-800:] if result.stderr else "No stderr"
	stdout_tail = result.stdout[-800:] if result.stdout else "No stdout"
	return None, None, f"No output generated.\n\nstderr:\n{stderr_tail}\n\nstdout:\n{stdout_tail}"
	with open(pkls[-1], "rb") as f:
	data = pickle.load(f)
	if isinstance(data, dict):
	joints = data.get("joints", data.get("motion"))
	elif isinstance(data, (list, tuple)):
	joints = data[0]
	else:
	joints = data
	if isinstance(joints, torch.Tensor):
	joints = joints.detach().cpu().numpy()
	if joints.ndim == 4:
	joints = joints[0]
	vid = render_video(joints, prompt, fps)
	img = render_overlay(joints, prompt)
	mname = "MotionLCM" if "lcm" in cfg_name else "MLD"
	info = (
	f"Method: {mname}\n"
	f"Prompt: \"{prompt}\"\n"
	f"Frames: {len(joints)} ({len(joints)/fps:.1f}s)\n"
	f"Time: {elapsed:.2f}s\n"
	f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}"
	)
	return vid, img, info


	EXAMPLES = [
	["a person walks forward and waves", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person jumps up and lands", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person walks in a counterclockwise circle", 8.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person sits down slowly", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person does jumping jacks", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person picks something up from the ground", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person walks backward cautiously", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person kicks with the right leg", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person bows politely", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person stretches their arms above their head", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person jogs in place then stops", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person dances happily", 6.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person throws a ball overhand", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
	["a person climbs stairs", 5.0, "MLD (Baseline, ~50 steps)", 42],
	]

	CUSTOM_CSS = """
	footer { display: none !important; }
	* { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important; }
	.generate-btn {
	background: linear-gradient(135deg, #06B6D4, #3B82F6) !important;
	border: none !important;
	color: white !important;
	font-weight: 700 !important;
	font-size: 1.05em !important;
	border-radius: 10px !important;
	transition: all 0.2s ease !important;
	padding: 12px !important;
	}
	.generate-btn:hover {
	transform: translateY(-1px) !important;
	box-shadow: 0 4px 20px rgba(6,182,212,0.35) !important;
	}
	"""

	with gr.Blocks(title="MotionLCM") as demo:

	# ── Header ──
	gr.HTML("""
	<div style="text-align:center; padding:28px 20px 20px; margin-bottom:12px; border-bottom:1px solid #e2e8f0;">
	<h1 style="font-size:2.6em; font-weight:800; margin:0 0 4px;
	background:linear-gradient(90deg,#06B6D4,#3B82F6);
	-webkit-background-clip:text; -webkit-text-fill-color:transparent;">
	MotionLCM
	</h1>
	<p style="color:#64748B; font-size:1em; margin:0 0 16px;">
	Real-Time Controllable Motion Generation via Latent Consistency Model
	</p>
	<span style="background:#06B6D4; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">~30ms Inference</span>
	<span style="background:#F97316; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">1929x Speedup</span>
	<span style="background:#E2E8F0; color:#334155; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">ECCV 2024</span>
	<div style="display:flex; justify-content:center; gap:32px; margin-top:18px; flex-wrap:wrap;">
	<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">22</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Body Joints</div></div>
	<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">8.4M</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Parameters</div></div>
	<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">1-4</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Denoising Steps</div></div>
	<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">20fps</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Output</div></div>
	</div>
	</div>
	""")

	# ── Architecture (no background, just border) ──
	gr.HTML("""
	<div style="border:1px solid #e2e8f0; border-radius:10px; padding:14px 20px; margin-bottom:16px;">
	<div style="font-size:0.9em; font-weight:700; color:#06B6D4; margin-bottom:8px;">How It Works</div>
	<div style="display:flex; align-items:center; justify-content:center; flex-wrap:wrap; gap:6px;">
	<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Text Prompt</div>
	<span style="color:#06B6D4; font-weight:bold;">→</span>
	<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Sentence-T5</div>
	<span style="color:#06B6D4; font-weight:bold;">→</span>
	<div style="border:2px solid #06B6D4; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#06B6D4; font-weight:700;">MotionLCM</div>
	<span style="color:#06B6D4; font-weight:bold;">→</span>
	<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">VAE Decoder</div>
	<span style="color:#06B6D4; font-weight:bold;">→</span>
	<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">3D Skeleton</div>
	</div>
	</div>
	""")

	# ── Main Interface ──
	with gr.Row():
	with gr.Column(scale=1):
	prompt = gr.Textbox(label="Text Prompt",
	placeholder="Describe a human motion... e.g., 'a person walks forward and waves'", lines=3)
	duration = gr.Slider(1.0, 15.0, 5.0, step=0.5, label="Duration (seconds)")
	seed = gr.Number(-1, label="Seed (-1 = random)", precision=0)
	method = gr.Radio(
	["MotionLCM (Real-time, 1-4 steps)", "MLD (Baseline, ~50 steps)"],
	value="MotionLCM (Real-time, 1-4 steps)", label="Generation Method")
	btn = gr.Button("Generate Motion", variant="primary", size="lg", elem_classes="generate-btn")
	with gr.Column(scale=2):
	with gr.Tabs():
	with gr.Tab("Animation"): vid = gr.Video(label="3D Skeleton Animation")
	with gr.Tab("Static Overlay"): img = gr.Image(label="Ghost Overlay View")
	info = gr.Textbox(label="Generation Info", lines=5, interactive=False)

	# ── Examples ──
	gr.Examples(EXAMPLES, [prompt, duration, method, seed], label="Try These Prompts", examples_per_page=7)

	# ── Footer ──
	gr.HTML("""
	<div style="margin-top:28px; padding:20px 24px; border-top:1px solid #e2e8f0; text-align:center;">
	<div style="margin-bottom:12px;">
	<a href="https://arxiv.org/abs/2404.19759" target="_blank"
	style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;">
	Research Paper
	</a>
	<span style="color:#CBD5E1;">·</span>
	<a href="https://github.com/Dai-Wenxun/MotionLCM" target="_blank"
	style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;">
	Original Code
	</a>
	</div>
	<hr style="width:50px; border:none; border-top:2px solid #e2e8f0; margin:10px auto;">
	<p style="color:#94A3B8; font-size:0.78em; margin:4px 0 0;">
	© 2026 Soumyanil Ain · MS Computer Science · UNC Charlotte
	</p>
	<p style="color:#CBD5E1; font-size:0.72em; margin:2px 0 0;">
	Based on MotionLCM (ECCV 2024) by Dai et al.
	</p>
	</div>
	""")

	btn.click(generate, [prompt, duration, method, seed], [vid, img, info])

	demo.queue().launch(
	ssr_mode=False,
	theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="blue", neutral_hue="slate"),
	css=CUSTOM_CSS,
	)