MotionLCM / app.py
soumyanilain's picture
Update app.py
3edb540 verified
"""MotionLCM - Real-Time Text-to-Motion Generation
Gradio Interface for Hugging Face Spaces
Author: Soumyanil Ain | MS CS | UNC Charlotte
"""
import os, sys, time, torch, tempfile, subprocess, pickle, glob
import numpy as np
REPO_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, REPO_DIR)
DATASET_DIR = os.path.join(REPO_DIR, "datasets", "humanml3d")
os.makedirs(os.path.join(DATASET_DIR, "new_joint_vecs"), exist_ok=True)
os.makedirs(os.path.join(DATASET_DIR, "texts"), exist_ok=True)
os.makedirs(os.path.join(DATASET_DIR, "new_joints"), exist_ok=True)
np.save(os.path.join(DATASET_DIR, "new_joint_vecs", "000000.npy"),
np.zeros((60, 263), dtype=np.float32))
np.save(os.path.join(DATASET_DIR, "new_joints", "000000.npy"),
np.zeros((60, 22, 3), dtype=np.float32))
with open(os.path.join(DATASET_DIR, "texts", "000000.txt"), "w") as f:
f.write("a person stands still.\n")
with open(os.path.join(DATASET_DIR, "test.txt"), "w") as f:
f.write("000000\n")
WRAPPER = os.path.join(REPO_DIR, "run_demo_patched.py")
with open(WRAPPER, "w") as f:
f.write('''
import sys, importlib
import numpy as np
import mld.data.humanml.dataset as ds_module
_orig_init = ds_module.Text2MotionDataset.__init__
def _patched_init(self, *args, **kwargs):
try:
_orig_init(self, *args, **kwargs)
except ValueError as e:
if "not enough values to unpack" in str(e):
print(f"[PATCH] Empty dataset detected, creating minimal dummy data")
self.name_list = ["000000"]
self.length_arr = np.array([60])
self.data_dict = {"000000": {"motion": np.zeros((60, 263), dtype=np.float32), "length": 60}}
self.nfeats = 263
self.max_length = 60
self.pointer = 0
self.num_actions = 1
else:
raise
ds_module.Text2MotionDataset.__init__ = _patched_init
exec(open("demo.py").read())
''')
PLOT_SCRIPT = os.path.join(REPO_DIR, "mld", "data", "humanml", "utils", "plot_script.py")
if os.path.exists(PLOT_SCRIPT):
with open(PLOT_SCRIPT, "r") as f:
c = f.read()
changed = False
if "ax.lines = []" in c:
c = c.replace("ax.lines = []", "while ax.lines: ax.lines[0].remove()")
changed = True
if "ax.collections = []" in c:
c = c.replace("ax.collections = []", "while ax.collections: ax.collections[0].remove()")
changed = True
if changed:
with open(PLOT_SCRIPT, "w") as f:
f.write(c)
import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
KINEMATIC_CHAIN = [
[0,2,5,8,11], [0,1,4,7,10], [0,3,6,9,12,15],
[9,14,17,19,21], [9,13,16,18,20],
]
COLORS = ["#EF4444","#3B82F6","#10B981","#F97316","#8B5CF6"]
LABELS = ["R Leg","L Leg","Spine","R Arm","L Arm"]
def render_video(joints, text="", fps=20):
nf = len(joints)
fig = plt.figure(figsize=(8,6), dpi=100)
ax = fig.add_subplot(111, projection="3d")
ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2]
m = 0.4
def update(f):
ax.cla()
ax.set_xlim([ax_x.min()-m, ax_x.max()+m])
ax.set_ylim([ax_z.min()-m, ax_z.max()+m])
ax.set_zlim([ax_y.min()-m, ax_y.max()+m])
title = text[:55] if text else "Generated Motion"
ax.set_title(f"{title}\nFrame {f+1}/{nf}", fontsize=10)
ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y")
for ch, co, la in zip(KINEMATIC_CHAIN, COLORS, LABELS):
v = [j for j in ch if j < joints.shape[1]]
ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1],
color=co, lw=2.5, marker="o", ms=4, label=la if f==0 else "")
if f == 0: ax.legend(fontsize=7, loc="upper left")
return []
anim = FuncAnimation(fig, update, frames=nf, interval=1000/fps, blit=False)
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
anim.save(tmp.name, writer="ffmpeg", fps=fps)
plt.close(fig)
return tmp.name
def render_overlay(joints, text=""):
nf = len(joints)
fig = plt.figure(figsize=(10,7), dpi=120)
ax = fig.add_subplot(111, projection="3d")
step = max(1, nf//10)
frames = list(range(0, nf, step))
if nf-1 not in frames: frames.append(nf-1)
ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2]
m = 0.5
ax.set_xlim([ax_x.min()-m, ax_x.max()+m])
ax.set_ylim([ax_z.min()-m, ax_z.max()+m])
ax.set_zlim([ax_y.min()-m, ax_y.max()+m])
ns = max(len(frames)-1, 1)
for i, f in enumerate(frames):
a = 0.12 + 0.88*(i/ns); lw = 1 + 2.5*(i/ns)
for ch, co in zip(KINEMATIC_CHAIN, COLORS):
v = [j for j in ch if j < joints.shape[1]]
ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1],
color=co, lw=lw, alpha=a, marker="o", ms=2.5*a)
r = joints[:,0,:]
ax.plot(r[:,0], r[:,2], r[:,1], color="white", lw=1, alpha=0.5, ls="--", label="Root")
title = text[:55] if text else "Generated Motion"
ax.set_title(f"{title}\n{nf} frames @ ~20fps", fontsize=11, color="white")
ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y")
ax.set_facecolor("#0F172A"); fig.patch.set_facecolor("#0F172A")
ax.tick_params(colors="#94A3B8")
ax.xaxis.label.set_color("#94A3B8")
ax.yaxis.label.set_color("#94A3B8")
ax.zaxis.label.set_color("#94A3B8")
ax.legend(fontsize=8, facecolor="#1E293B", labelcolor="white")
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
plt.savefig(tmp.name, dpi=120, bbox_inches="tight", facecolor="#0F172A")
plt.close(fig)
return tmp.name
def generate(prompt, duration, method, seed):
if not prompt or not prompt.strip():
return None, None, "Please enter a text prompt."
cfg_map = {
"MotionLCM (Real-time, 1-4 steps)": "motionlcm_t2m.yaml",
"MLD (Baseline, ~50 steps)": "mld_t2m.yaml",
}
cfg_name = cfg_map.get(method, "motionlcm_t2m.yaml")
fps = 20
nframes = max(20, min(300, int(duration * fps)))
pf = os.path.join(REPO_DIR, "assets", "_gradio_prompt.txt")
os.makedirs(os.path.dirname(pf), exist_ok=True)
with open(pf, "w") as f:
f.write(f"{nframes} {prompt.strip()}")
env = os.environ.copy()
if seed >= 0:
env["PYTHONHASHSEED"] = str(int(seed))
for td in ["experiments_t2m_test", "experiments_control_test"]:
old_pkls = glob.glob(os.path.join(REPO_DIR, td, "**", "*.pkl"), recursive=True)
for p in old_pkls:
os.remove(p)
t0 = time.time()
result = subprocess.run(
["python", "run_demo_patched.py", "--cfg", f"configs/{cfg_name}", "--example", pf],
cwd=REPO_DIR, capture_output=True, text=True, timeout=600, env=env
)
elapsed = time.time() - t0
pkls = []
for td in ["experiments_t2m_test", "experiments_control_test"]:
pkls.extend(sorted(glob.glob(os.path.join(REPO_DIR, td, "**", "*.pkl"), recursive=True)))
if not pkls:
stderr_tail = result.stderr[-800:] if result.stderr else "No stderr"
stdout_tail = result.stdout[-800:] if result.stdout else "No stdout"
return None, None, f"No output generated.\n\nstderr:\n{stderr_tail}\n\nstdout:\n{stdout_tail}"
with open(pkls[-1], "rb") as f:
data = pickle.load(f)
if isinstance(data, dict):
joints = data.get("joints", data.get("motion"))
elif isinstance(data, (list, tuple)):
joints = data[0]
else:
joints = data
if isinstance(joints, torch.Tensor):
joints = joints.detach().cpu().numpy()
if joints.ndim == 4:
joints = joints[0]
vid = render_video(joints, prompt, fps)
img = render_overlay(joints, prompt)
mname = "MotionLCM" if "lcm" in cfg_name else "MLD"
info = (
f"Method: {mname}\n"
f"Prompt: \"{prompt}\"\n"
f"Frames: {len(joints)} ({len(joints)/fps:.1f}s)\n"
f"Time: {elapsed:.2f}s\n"
f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}"
)
return vid, img, info
EXAMPLES = [
["a person walks forward and waves", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person jumps up and lands", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person walks in a counterclockwise circle", 8.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person sits down slowly", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person does jumping jacks", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person picks something up from the ground", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person walks backward cautiously", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person kicks with the right leg", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person bows politely", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person stretches their arms above their head", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person jogs in place then stops", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person dances happily", 6.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person throws a ball overhand", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1],
["a person climbs stairs", 5.0, "MLD (Baseline, ~50 steps)", 42],
]
CUSTOM_CSS = """
footer { display: none !important; }
* { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important; }
.generate-btn {
background: linear-gradient(135deg, #06B6D4, #3B82F6) !important;
border: none !important;
color: white !important;
font-weight: 700 !important;
font-size: 1.05em !important;
border-radius: 10px !important;
transition: all 0.2s ease !important;
padding: 12px !important;
}
.generate-btn:hover {
transform: translateY(-1px) !important;
box-shadow: 0 4px 20px rgba(6,182,212,0.35) !important;
}
"""
with gr.Blocks(title="MotionLCM") as demo:
# ── Header ──
gr.HTML("""
<div style="text-align:center; padding:28px 20px 20px; margin-bottom:12px; border-bottom:1px solid #e2e8f0;">
<h1 style="font-size:2.6em; font-weight:800; margin:0 0 4px;
background:linear-gradient(90deg,#06B6D4,#3B82F6);
-webkit-background-clip:text; -webkit-text-fill-color:transparent;">
MotionLCM
</h1>
<p style="color:#64748B; font-size:1em; margin:0 0 16px;">
Real-Time Controllable Motion Generation via Latent Consistency Model
</p>
<span style="background:#06B6D4; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">~30ms Inference</span>
<span style="background:#F97316; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">1929x Speedup</span>
<span style="background:#E2E8F0; color:#334155; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">ECCV 2024</span>
<div style="display:flex; justify-content:center; gap:32px; margin-top:18px; flex-wrap:wrap;">
<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">22</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Body Joints</div></div>
<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">8.4M</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Parameters</div></div>
<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">1-4</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Denoising Steps</div></div>
<div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">20fps</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Output</div></div>
</div>
</div>
""")
# ── Architecture (no background, just border) ──
gr.HTML("""
<div style="border:1px solid #e2e8f0; border-radius:10px; padding:14px 20px; margin-bottom:16px;">
<div style="font-size:0.9em; font-weight:700; color:#06B6D4; margin-bottom:8px;">How It Works</div>
<div style="display:flex; align-items:center; justify-content:center; flex-wrap:wrap; gap:6px;">
<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Text Prompt</div>
<span style="color:#06B6D4; font-weight:bold;">&rarr;</span>
<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Sentence-T5</div>
<span style="color:#06B6D4; font-weight:bold;">&rarr;</span>
<div style="border:2px solid #06B6D4; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#06B6D4; font-weight:700;">MotionLCM</div>
<span style="color:#06B6D4; font-weight:bold;">&rarr;</span>
<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">VAE Decoder</div>
<span style="color:#06B6D4; font-weight:bold;">&rarr;</span>
<div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">3D Skeleton</div>
</div>
</div>
""")
# ── Main Interface ──
with gr.Row():
with gr.Column(scale=1):
prompt = gr.Textbox(label="Text Prompt",
placeholder="Describe a human motion... e.g., 'a person walks forward and waves'", lines=3)
duration = gr.Slider(1.0, 15.0, 5.0, step=0.5, label="Duration (seconds)")
seed = gr.Number(-1, label="Seed (-1 = random)", precision=0)
method = gr.Radio(
["MotionLCM (Real-time, 1-4 steps)", "MLD (Baseline, ~50 steps)"],
value="MotionLCM (Real-time, 1-4 steps)", label="Generation Method")
btn = gr.Button("Generate Motion", variant="primary", size="lg", elem_classes="generate-btn")
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("Animation"): vid = gr.Video(label="3D Skeleton Animation")
with gr.Tab("Static Overlay"): img = gr.Image(label="Ghost Overlay View")
info = gr.Textbox(label="Generation Info", lines=5, interactive=False)
# ── Examples ──
gr.Examples(EXAMPLES, [prompt, duration, method, seed], label="Try These Prompts", examples_per_page=7)
# ── Footer ──
gr.HTML("""
<div style="margin-top:28px; padding:20px 24px; border-top:1px solid #e2e8f0; text-align:center;">
<div style="margin-bottom:12px;">
<a href="https://arxiv.org/abs/2404.19759" target="_blank"
style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;">
Research Paper
</a>
<span style="color:#CBD5E1;">&middot;</span>
<a href="https://github.com/Dai-Wenxun/MotionLCM" target="_blank"
style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;">
Original Code
</a>
</div>
<hr style="width:50px; border:none; border-top:2px solid #e2e8f0; margin:10px auto;">
<p style="color:#94A3B8; font-size:0.78em; margin:4px 0 0;">
&copy; 2026 Soumyanil Ain &middot; MS Computer Science &middot; UNC Charlotte
</p>
<p style="color:#CBD5E1; font-size:0.72em; margin:2px 0 0;">
Based on MotionLCM (ECCV 2024) by Dai et al.
</p>
</div>
""")
btn.click(generate, [prompt, duration, method, seed], [vid, img, info])
demo.queue().launch(
ssr_mode=False,
theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="blue", neutral_hue="slate"),
css=CUSTOM_CSS,
)