| """MotionLCM - Real-Time Text-to-Motion Generation |
| Gradio Interface for Hugging Face Spaces |
| Author: Soumyanil Ain | MS CS | UNC Charlotte |
| """ |
| import os, sys, time, torch, tempfile, subprocess, pickle, glob |
| import numpy as np |
|
|
| REPO_DIR = os.path.dirname(os.path.abspath(__file__)) |
| sys.path.insert(0, REPO_DIR) |
|
|
| DATASET_DIR = os.path.join(REPO_DIR, "datasets", "humanml3d") |
| os.makedirs(os.path.join(DATASET_DIR, "new_joint_vecs"), exist_ok=True) |
| os.makedirs(os.path.join(DATASET_DIR, "texts"), exist_ok=True) |
| os.makedirs(os.path.join(DATASET_DIR, "new_joints"), exist_ok=True) |
|
|
| np.save(os.path.join(DATASET_DIR, "new_joint_vecs", "000000.npy"), |
| np.zeros((60, 263), dtype=np.float32)) |
| np.save(os.path.join(DATASET_DIR, "new_joints", "000000.npy"), |
| np.zeros((60, 22, 3), dtype=np.float32)) |
|
|
| with open(os.path.join(DATASET_DIR, "texts", "000000.txt"), "w") as f: |
| f.write("a person stands still.\n") |
|
|
| with open(os.path.join(DATASET_DIR, "test.txt"), "w") as f: |
| f.write("000000\n") |
|
|
| WRAPPER = os.path.join(REPO_DIR, "run_demo_patched.py") |
| with open(WRAPPER, "w") as f: |
| f.write(''' |
| import sys, importlib |
| import numpy as np |
| import mld.data.humanml.dataset as ds_module |
| _orig_init = ds_module.Text2MotionDataset.__init__ |
| def _patched_init(self, *args, **kwargs): |
| try: |
| _orig_init(self, *args, **kwargs) |
| except ValueError as e: |
| if "not enough values to unpack" in str(e): |
| print(f"[PATCH] Empty dataset detected, creating minimal dummy data") |
| self.name_list = ["000000"] |
| self.length_arr = np.array([60]) |
| self.data_dict = {"000000": {"motion": np.zeros((60, 263), dtype=np.float32), "length": 60}} |
| self.nfeats = 263 |
| self.max_length = 60 |
| self.pointer = 0 |
| self.num_actions = 1 |
| else: |
| raise |
| ds_module.Text2MotionDataset.__init__ = _patched_init |
| exec(open("demo.py").read()) |
| ''') |
|
|
| PLOT_SCRIPT = os.path.join(REPO_DIR, "mld", "data", "humanml", "utils", "plot_script.py") |
| if os.path.exists(PLOT_SCRIPT): |
| with open(PLOT_SCRIPT, "r") as f: |
| c = f.read() |
| changed = False |
| if "ax.lines = []" in c: |
| c = c.replace("ax.lines = []", "while ax.lines: ax.lines[0].remove()") |
| changed = True |
| if "ax.collections = []" in c: |
| c = c.replace("ax.collections = []", "while ax.collections: ax.collections[0].remove()") |
| changed = True |
| if changed: |
| with open(PLOT_SCRIPT, "w") as f: |
| f.write(c) |
|
|
| import gradio as gr |
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| from matplotlib.animation import FuncAnimation |
|
|
| KINEMATIC_CHAIN = [ |
| [0,2,5,8,11], [0,1,4,7,10], [0,3,6,9,12,15], |
| [9,14,17,19,21], [9,13,16,18,20], |
| ] |
| COLORS = ["#EF4444","#3B82F6","#10B981","#F97316","#8B5CF6"] |
| LABELS = ["R Leg","L Leg","Spine","R Arm","L Arm"] |
|
|
|
|
| def render_video(joints, text="", fps=20): |
| nf = len(joints) |
| fig = plt.figure(figsize=(8,6), dpi=100) |
| ax = fig.add_subplot(111, projection="3d") |
| ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2] |
| m = 0.4 |
| def update(f): |
| ax.cla() |
| ax.set_xlim([ax_x.min()-m, ax_x.max()+m]) |
| ax.set_ylim([ax_z.min()-m, ax_z.max()+m]) |
| ax.set_zlim([ax_y.min()-m, ax_y.max()+m]) |
| title = text[:55] if text else "Generated Motion" |
| ax.set_title(f"{title}\nFrame {f+1}/{nf}", fontsize=10) |
| ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y") |
| for ch, co, la in zip(KINEMATIC_CHAIN, COLORS, LABELS): |
| v = [j for j in ch if j < joints.shape[1]] |
| ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1], |
| color=co, lw=2.5, marker="o", ms=4, label=la if f==0 else "") |
| if f == 0: ax.legend(fontsize=7, loc="upper left") |
| return [] |
| anim = FuncAnimation(fig, update, frames=nf, interval=1000/fps, blit=False) |
| tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) |
| anim.save(tmp.name, writer="ffmpeg", fps=fps) |
| plt.close(fig) |
| return tmp.name |
|
|
|
|
| def render_overlay(joints, text=""): |
| nf = len(joints) |
| fig = plt.figure(figsize=(10,7), dpi=120) |
| ax = fig.add_subplot(111, projection="3d") |
| step = max(1, nf//10) |
| frames = list(range(0, nf, step)) |
| if nf-1 not in frames: frames.append(nf-1) |
| ax_x, ax_y, ax_z = joints[:,:,0], joints[:,:,1], joints[:,:,2] |
| m = 0.5 |
| ax.set_xlim([ax_x.min()-m, ax_x.max()+m]) |
| ax.set_ylim([ax_z.min()-m, ax_z.max()+m]) |
| ax.set_zlim([ax_y.min()-m, ax_y.max()+m]) |
| ns = max(len(frames)-1, 1) |
| for i, f in enumerate(frames): |
| a = 0.12 + 0.88*(i/ns); lw = 1 + 2.5*(i/ns) |
| for ch, co in zip(KINEMATIC_CHAIN, COLORS): |
| v = [j for j in ch if j < joints.shape[1]] |
| ax.plot(joints[f,v,0], joints[f,v,2], joints[f,v,1], |
| color=co, lw=lw, alpha=a, marker="o", ms=2.5*a) |
| r = joints[:,0,:] |
| ax.plot(r[:,0], r[:,2], r[:,1], color="white", lw=1, alpha=0.5, ls="--", label="Root") |
| title = text[:55] if text else "Generated Motion" |
| ax.set_title(f"{title}\n{nf} frames @ ~20fps", fontsize=11, color="white") |
| ax.set_xlabel("X"); ax.set_ylabel("Z"); ax.set_zlabel("Y") |
| ax.set_facecolor("#0F172A"); fig.patch.set_facecolor("#0F172A") |
| ax.tick_params(colors="#94A3B8") |
| ax.xaxis.label.set_color("#94A3B8") |
| ax.yaxis.label.set_color("#94A3B8") |
| ax.zaxis.label.set_color("#94A3B8") |
| ax.legend(fontsize=8, facecolor="#1E293B", labelcolor="white") |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) |
| plt.savefig(tmp.name, dpi=120, bbox_inches="tight", facecolor="#0F172A") |
| plt.close(fig) |
| return tmp.name |
|
|
|
|
| def generate(prompt, duration, method, seed): |
| if not prompt or not prompt.strip(): |
| return None, None, "Please enter a text prompt." |
| cfg_map = { |
| "MotionLCM (Real-time, 1-4 steps)": "motionlcm_t2m.yaml", |
| "MLD (Baseline, ~50 steps)": "mld_t2m.yaml", |
| } |
| cfg_name = cfg_map.get(method, "motionlcm_t2m.yaml") |
| fps = 20 |
| nframes = max(20, min(300, int(duration * fps))) |
| pf = os.path.join(REPO_DIR, "assets", "_gradio_prompt.txt") |
| os.makedirs(os.path.dirname(pf), exist_ok=True) |
| with open(pf, "w") as f: |
| f.write(f"{nframes} {prompt.strip()}") |
| env = os.environ.copy() |
| if seed >= 0: |
| env["PYTHONHASHSEED"] = str(int(seed)) |
| for td in ["experiments_t2m_test", "experiments_control_test"]: |
| old_pkls = glob.glob(os.path.join(REPO_DIR, td, "**", "*.pkl"), recursive=True) |
| for p in old_pkls: |
| os.remove(p) |
| t0 = time.time() |
| result = subprocess.run( |
| ["python", "run_demo_patched.py", "--cfg", f"configs/{cfg_name}", "--example", pf], |
| cwd=REPO_DIR, capture_output=True, text=True, timeout=600, env=env |
| ) |
| elapsed = time.time() - t0 |
| pkls = [] |
| for td in ["experiments_t2m_test", "experiments_control_test"]: |
| pkls.extend(sorted(glob.glob(os.path.join(REPO_DIR, td, "**", "*.pkl"), recursive=True))) |
| if not pkls: |
| stderr_tail = result.stderr[-800:] if result.stderr else "No stderr" |
| stdout_tail = result.stdout[-800:] if result.stdout else "No stdout" |
| return None, None, f"No output generated.\n\nstderr:\n{stderr_tail}\n\nstdout:\n{stdout_tail}" |
| with open(pkls[-1], "rb") as f: |
| data = pickle.load(f) |
| if isinstance(data, dict): |
| joints = data.get("joints", data.get("motion")) |
| elif isinstance(data, (list, tuple)): |
| joints = data[0] |
| else: |
| joints = data |
| if isinstance(joints, torch.Tensor): |
| joints = joints.detach().cpu().numpy() |
| if joints.ndim == 4: |
| joints = joints[0] |
| vid = render_video(joints, prompt, fps) |
| img = render_overlay(joints, prompt) |
| mname = "MotionLCM" if "lcm" in cfg_name else "MLD" |
| info = ( |
| f"Method: {mname}\n" |
| f"Prompt: \"{prompt}\"\n" |
| f"Frames: {len(joints)} ({len(joints)/fps:.1f}s)\n" |
| f"Time: {elapsed:.2f}s\n" |
| f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}" |
| ) |
| return vid, img, info |
|
|
|
|
| EXAMPLES = [ |
| ["a person walks forward and waves", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person jumps up and lands", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person walks in a counterclockwise circle", 8.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person sits down slowly", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person does jumping jacks", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person picks something up from the ground", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person walks backward cautiously", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person kicks with the right leg", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person bows politely", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person stretches their arms above their head", 4.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person jogs in place then stops", 5.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person dances happily", 6.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person throws a ball overhand", 3.0, "MotionLCM (Real-time, 1-4 steps)", -1], |
| ["a person climbs stairs", 5.0, "MLD (Baseline, ~50 steps)", 42], |
| ] |
|
|
| CUSTOM_CSS = """ |
| footer { display: none !important; } |
| * { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important; } |
| .generate-btn { |
| background: linear-gradient(135deg, #06B6D4, #3B82F6) !important; |
| border: none !important; |
| color: white !important; |
| font-weight: 700 !important; |
| font-size: 1.05em !important; |
| border-radius: 10px !important; |
| transition: all 0.2s ease !important; |
| padding: 12px !important; |
| } |
| .generate-btn:hover { |
| transform: translateY(-1px) !important; |
| box-shadow: 0 4px 20px rgba(6,182,212,0.35) !important; |
| } |
| """ |
|
|
| with gr.Blocks(title="MotionLCM") as demo: |
|
|
| |
| gr.HTML(""" |
| <div style="text-align:center; padding:28px 20px 20px; margin-bottom:12px; border-bottom:1px solid #e2e8f0;"> |
| <h1 style="font-size:2.6em; font-weight:800; margin:0 0 4px; |
| background:linear-gradient(90deg,#06B6D4,#3B82F6); |
| -webkit-background-clip:text; -webkit-text-fill-color:transparent;"> |
| MotionLCM |
| </h1> |
| <p style="color:#64748B; font-size:1em; margin:0 0 16px;"> |
| Real-Time Controllable Motion Generation via Latent Consistency Model |
| </p> |
| <span style="background:#06B6D4; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">~30ms Inference</span> |
| <span style="background:#F97316; color:white; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">1929x Speedup</span> |
| <span style="background:#E2E8F0; color:#334155; padding:5px 14px; border-radius:50px; font-size:0.82em; font-weight:700; margin:0 3px;">ECCV 2024</span> |
| <div style="display:flex; justify-content:center; gap:32px; margin-top:18px; flex-wrap:wrap;"> |
| <div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">22</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Body Joints</div></div> |
| <div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">8.4M</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Parameters</div></div> |
| <div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">1-4</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Denoising Steps</div></div> |
| <div style="text-align:center;"><div style="font-size:1.5em; font-weight:800; color:#06B6D4;">20fps</div><div style="font-size:0.7em; color:#94A3B8; text-transform:uppercase; letter-spacing:0.5px;">Output</div></div> |
| </div> |
| </div> |
| """) |
|
|
| |
| gr.HTML(""" |
| <div style="border:1px solid #e2e8f0; border-radius:10px; padding:14px 20px; margin-bottom:16px;"> |
| <div style="font-size:0.9em; font-weight:700; color:#06B6D4; margin-bottom:8px;">How It Works</div> |
| <div style="display:flex; align-items:center; justify-content:center; flex-wrap:wrap; gap:6px;"> |
| <div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Text Prompt</div> |
| <span style="color:#06B6D4; font-weight:bold;">→</span> |
| <div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">Sentence-T5</div> |
| <span style="color:#06B6D4; font-weight:bold;">→</span> |
| <div style="border:2px solid #06B6D4; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#06B6D4; font-weight:700;">MotionLCM</div> |
| <span style="color:#06B6D4; font-weight:bold;">→</span> |
| <div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">VAE Decoder</div> |
| <span style="color:#06B6D4; font-weight:bold;">→</span> |
| <div style="border:1px solid #e2e8f0; padding:6px 12px; border-radius:6px; font-size:0.8em; color:#334155; font-weight:600;">3D Skeleton</div> |
| </div> |
| </div> |
| """) |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| prompt = gr.Textbox(label="Text Prompt", |
| placeholder="Describe a human motion... e.g., 'a person walks forward and waves'", lines=3) |
| duration = gr.Slider(1.0, 15.0, 5.0, step=0.5, label="Duration (seconds)") |
| seed = gr.Number(-1, label="Seed (-1 = random)", precision=0) |
| method = gr.Radio( |
| ["MotionLCM (Real-time, 1-4 steps)", "MLD (Baseline, ~50 steps)"], |
| value="MotionLCM (Real-time, 1-4 steps)", label="Generation Method") |
| btn = gr.Button("Generate Motion", variant="primary", size="lg", elem_classes="generate-btn") |
| with gr.Column(scale=2): |
| with gr.Tabs(): |
| with gr.Tab("Animation"): vid = gr.Video(label="3D Skeleton Animation") |
| with gr.Tab("Static Overlay"): img = gr.Image(label="Ghost Overlay View") |
| info = gr.Textbox(label="Generation Info", lines=5, interactive=False) |
|
|
| |
| gr.Examples(EXAMPLES, [prompt, duration, method, seed], label="Try These Prompts", examples_per_page=7) |
|
|
| |
| gr.HTML(""" |
| <div style="margin-top:28px; padding:20px 24px; border-top:1px solid #e2e8f0; text-align:center;"> |
| <div style="margin-bottom:12px;"> |
| <a href="https://arxiv.org/abs/2404.19759" target="_blank" |
| style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;"> |
| Research Paper |
| </a> |
| <span style="color:#CBD5E1;">·</span> |
| <a href="https://github.com/Dai-Wenxun/MotionLCM" target="_blank" |
| style="color:#06B6D4; text-decoration:none; font-size:0.88em; font-weight:600; margin:0 10px;"> |
| Original Code |
| </a> |
| </div> |
| <hr style="width:50px; border:none; border-top:2px solid #e2e8f0; margin:10px auto;"> |
| <p style="color:#94A3B8; font-size:0.78em; margin:4px 0 0;"> |
| © 2026 Soumyanil Ain · MS Computer Science · UNC Charlotte |
| </p> |
| <p style="color:#CBD5E1; font-size:0.72em; margin:2px 0 0;"> |
| Based on MotionLCM (ECCV 2024) by Dai et al. |
| </p> |
| </div> |
| """) |
|
|
| btn.click(generate, [prompt, duration, method, seed], [vid, img, info]) |
|
|
| demo.queue().launch( |
| ssr_mode=False, |
| theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="blue", neutral_hue="slate"), |
| css=CUSTOM_CSS, |
| ) |