Spaces:

rdz-falcon
/

SignMotionGPT

Running

App Files Files Community

rdz-falcon commited on Dec 8, 2025

Commit

1ea37cf

verified ·

1 Parent(s): 2d54a11

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -301

app.py CHANGED Viewed

@@ -1,349 +1,375 @@
-import gradio as gr
-import torch
 import os
 import sys
-import warnings
 import re
 import json
 import random
 from pathlib import Path
-# Add root to path to allow imports from project root when running from demo-code/
-# or when running from root
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(current_dir)
-sys.path.append(parent_dir)
-# Import project modules
-try:
-    from visualize import visualize
-    # Try importing what we can, but we will implement generation logic directly here
-    # to match test_overfit.py / metrics.py exactly and avoid dependency issues.
-    # We catch Exception because unsloth in model.py might raise NotImplementedError on CPU
-    from model import get_motion_token_info
-except Exception as e:
-    print(f"Error importing project modules: {e}")
-    print("Make sure you are running this from the project root or have the project structure intact.")
-    # Fallback for explicit relative imports if needed in some environments
-    try:
-        from visualize import visualize
-    except Exception as vis_e:
-        print(f"Visualize import failed too: {vis_e}")
-# Constants
-HF_REPO_ID = "rdz-falcon/SignMotionGPTfit-archive"
-EPOCH_SUBFOLDER = "stage2_v2/epoch-030"
-CODEBOOK_SIZE = 512
-DATASET_PATH = os.environ.get("DATASET_PATH", "enriched_dataset.json")
-# Hardcoded Config from test_overfit.py / config.py
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
-M_START = "<M_START>"
-M_END = "<M_END>"
-# Global model cache
-MODEL = None
-TOKENIZER = None
-# We use M_START/M_END as in test_overfit.py
-M_START_ID = None
-M_END_ID = None
-VARIANT_MAP = {}
-def load_variant_map():
-    """Load dataset to map words to valid participant IDs."""
-    global VARIANT_MAP
-    # Try multiple possible paths for the dataset
-    candidates = [
-        DATASET_PATH,
-        os.path.join(os.path.dirname(__file__), DATASET_PATH),
-        os.path.join(os.path.dirname(__file__), "..", DATASET_PATH),
-        "data/motion_llm_dataset.json", # Fallback to raw dataset if enriched missing
-        "motion_llm_dataset.json"
-    ]
-    found_path = None
-    for p in candidates:
-        if os.path.exists(p):
-            found_path = p
-            break
-    if found_path:
-        print(f"Loading variants from {found_path}...")
-        try:
-            with open(found_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-            mapping = {}
-            count = 0
-            for entry in data:
-                # Support both formats (enriched or raw)
-                word = entry.get("word") or entry.get("text_query")
-                if not word: continue
-                # Clean word (sometimes text_query is "Motion for word 'hello'")
-                if "motion for word" in word.lower():
-                    # extraction heuristic if needed, but 'word' field is preferred
-                    pass
-                word = word.lower().strip()
-                pid = entry.get("participant_id")
-                if word and pid:
-                    if word not in mapping:
-                        mapping[word] = []
-                    if pid not in mapping[word]:
-                        mapping[word].append(str(pid))
-                        count += 1
-            VARIANT_MAP = mapping
-            print(f"Loaded {count} variants for {len(VARIANT_MAP)} words.")
-            # Debug check for 'push'
-            if 'push' in VARIANT_MAP:
-                print(f"  'push' variants: {VARIANT_MAP['push']}")
-            else:
-                print("  'push' NOT found in dataset.")
-        except Exception as e:
-            print(f"Error loading dataset: {e}")
-    else:
-        print(f"⚠️ Dataset not found. Tried: {candidates}. Variants will default to 'unknown'.")
-    # Hardcoded fallback for demonstration words if missing from dataset
-    defaults = {
-        "push": ["P40", "P123", "P1"],
-        "send": ["P40", "P123"],
-        "library": ["P40"],
-        "passport": ["P40"]
-    }
-    for w, pids in defaults.items():
-        if w not in VARIANT_MAP:
-            VARIANT_MAP[w] = pids
-            print(f"  Added fallback variants for '{w}': {pids}")
-def load_model_from_hf(repo_id, subfolder, token=None):
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    print(f"Loading model from HF: {repo_id}/{subfolder}")
     try:
-        tokenizer = AutoTokenizer.from_pretrained(repo_id, subfolder=subfolder, token=token, trust_remote_code=True)
-        model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder=subfolder, token=token, trust_remote_code=True)
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        return None, None
-def init_model():
-    global MODEL, TOKENIZER, M_START_ID, M_END_ID
-    if MODEL is not None:
         return
-    load_variant_map()
-    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
-    # Load model/tokenizer
-    MODEL, TOKENIZER = load_model_from_hf(HF_REPO_ID, EPOCH_SUBFOLDER, token)
-    if MODEL is None:
-        raise RuntimeError(f"Failed to load model from {HF_REPO_ID}/{EPOCH_SUBFOLDER}")
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    MODEL.to(device)
-    MODEL.eval()
-    # Setup special tokens matching test_overfit.py
-    # test_overfit.py uses M_START="<M_START>" and M_END="<M_END>"
-    # Check if tokens exist
-    if M_START not in TOKENIZER.get_vocab() or M_END not in TOKENIZER.get_vocab():
-        print(f"⚠️  Warning: {M_START} or {M_END} not found in tokenizer. Adding them now...")
-        num_added = TOKENIZER.add_special_tokens({"additional_special_tokens": [M_START, M_END]})
-        if num_added > 0:
-            MODEL.resize_token_embeddings(len(TOKENIZER))
-            print(f"   Added {num_added} special tokens.")
-    M_START_ID = TOKENIZER.convert_tokens_to_ids(M_START)
-    M_END_ID = TOKENIZER.convert_tokens_to_ids(M_END)
-    # Check motion tokens
-    # We expect <motion_0> ... <motion_511>
-    # If missing, add them
-    first_motion = "<motion_0>"
-    if first_motion not in TOKENIZER.get_vocab():
-        print("⚠️  Warning: Motion tokens not found. Adding them now...")
-        motion_tokens = [f"<motion_{i}>" for i in range(CODEBOOK_SIZE)]
-        num_added = TOKENIZER.add_tokens(motion_tokens, special_tokens=True)
-        if num_added > 0:
-            MODEL.resize_token_embeddings(len(TOKENIZER))
-            print(f"   Added {num_added} motion tokens.")
-    print(f"Model initialized. Vocab size: {len(TOKENIZER)}")
-    print(f"M_START_ID: {M_START_ID}, M_END_ID: {M_END_ID}")
-def generate_motion_simple(model, tokenizer, prompt_text, device):
-    """
-    Replicates the simple generation logic from metrics.py / test_overfit.py
-    """
-    # Construct prompt exactly as in test_overfit.py:
-    # prompt = f"Instruction: Generate motion for word '{sample['word']}' with variant '{sample['participant_id']}'.\nMotion: "
-    # Get a valid participant ID if possible
-    word_lower = prompt_text.lower().strip()
-    variants = VARIANT_MAP.get(word_lower, [])
-    if variants:
-        pid = random.choice(variants)
-        print(f"Selected variant '{pid}' for word '{prompt_text}'")
-    else:
-        # Fallback to 'unknown' or a common PID if known (e.g., P1)
-        pid = "unknown"
-        print(f"No variants found for '{prompt_text}', using '{pid}'")
-    prompt = f"Instruction: Generate motion for word '{prompt_text}' with variant '{pid}'.\nMotion: "
-    print(f"Input Prompt:\n{prompt}")
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model.generate(
-            **inputs,
-            max_new_tokens=100,
-            do_sample=True,
-            temperature=INFERENCE_TEMPERATURE,
-            top_k=INFERENCE_TOP_K,
-            repetition_penalty=INFERENCE_REPETITION_PENALTY,
-            pad_token_id=tokenizer.pad_token_id,
-            eos_token_id=M_END_ID, # Stop at <M_END>
-            early_stopping=True
-        )
-    decoded = tokenizer.decode(output[0], skip_special_tokens=False)
-    # Parse output to extract just the motion part
-    # We expect: ... \nMotion: <M_START> <motion_...> ... <M_END>
-    if "Motion: " in decoded:
-        motion_part = decoded.split("Motion: ")[-1]
-    else:
-        motion_part = decoded
-    return motion_part.strip()
-def generate_motion_app(text_prompt):
-    if not text_prompt:
-        return None, "Please enter a prompt."
-    if MODEL is None:
-        try:
-            init_model()
-        except Exception as e:
-            return None, f"Model Initialization Failed: {e}"
-    device = MODEL.device
-    print(f"Generating for: {text_prompt}")
-    try:
-        generated_sequence = generate_motion_simple(MODEL, TOKENIZER, text_prompt, device)
-        print("Generated sequence (raw):", generated_sequence)
-        # Extract tokens for visualization
-        # Logic from metrics.py: _extract_motion_tokens_from_sequence
-        # Expect tokens like <M123> or <motion_123>
-        # The generation might include M_START/M_END.
-        # Clean up for visualization input
-        # We need a string of tokens.
-        # If the output is like "<M_START> <motion_1> <motion_2> <M_END>", we pass that.
-        # visualize.py's parse_motion_tokens handles <motion_ID> regex.
-        # BUT visualize.py expects either "123 456" OR "<motion_123> <motion_456>"
-        # It does NOT explicitly handle <M123> which is what we might have here if M_START was used.
-        # Let's convert <M123> to space-separated integers for safety.
-        # Extract integers from <M123> or <motion_123>
-        # generated_sequence is raw string from tokenizer decode
-        import re
-        # Try <M123> format (test_overfit style)
-        m_tokens = re.findall(r'<M(\d+)>', generated_sequence)
-        if not m_tokens:
-            # Try <motion_123> format
-            m_tokens = re.findall(r'<motion_(\d+)>', generated_sequence)
-        if m_tokens:
-            # Reconstruct as space-separated string for visualize.py
-            tokens_for_vis = " ".join(m_tokens)
-        else:
-            # Fallback to raw string if regex failed (visualize.py might handle other formats)
-            tokens_for_vis = generated_sequence
-        print(f"Tokens for visualization: {tokens_for_vis[:50]}...")
-    except Exception as e:
-        return None, f"Generation Error: {e}"
-    # Visualization
-    try:
-        # Ensure paths for VQ-VAE and SMPL-X
-        data_dir = os.environ.get("DATA_DIR", "data")
-        vqvae_ckpt = os.path.join(data_dir, "vqvae_model.pt")
-        stats_path = os.path.join(data_dir, "vqvae_stats.pt")
-        smplx_dir = os.path.join(data_dir, "smplx_models")
-        # Check existence
-        missing = []
-        if not os.path.exists(vqvae_ckpt): missing.append(vqvae_ckpt)
-        if not os.path.exists(stats_path): missing.append(stats_path)
-        if not os.path.exists(smplx_dir): missing.append(smplx_dir)
-        if missing:
-            return None, f"Missing visualization files in {data_dir}: {missing}. Please ensure they are uploaded to the Space."
-        # Output to a temporary file
-        output_html = "temp_viz.html"
-        fig = visualize(
-            tokens=tokens_for_vis,
-            vqvae_ckpt=vqvae_ckpt,
-            stats_path=stats_path,
-            smplx_dir=smplx_dir,
-            output_html=output_html,
-            title=f"Motion: {text_prompt}",
-            fps=20
-        )
-        if fig is None:
-             return None, "Visualization failed (no frames produced)."
-        # Count tokens for display
-        matches = re.findall(r'<motion_(\d+)>', tokens_for_vis)
-        # Also check for <M...> format just in case
-        if not matches:
-             matches = re.findall(r'<M(\d+)>', tokens_for_vis)
-        num_tokens = len(matches)
-        return fig, f"Success! Generated tokens length: {num_tokens}. Sequence: {tokens_for_vis[:100]}..."
-    except Exception as e:
-        return None, f"Visualization Error: {e}"
-# Gradio UI
-with gr.Interface(
-    fn=generate_motion_app,
-    inputs=gr.Textbox(label="Enter Motion Prompt", placeholder="e.g. walking forward"),
-    outputs=[
-        gr.Plot(label="Motion Visualization"),
-        gr.Textbox(label="Status/Output")
-    ],
-    title="SignMotionGPT Demo",
-    description="Generate Sign Language/Motion Avatars from Text. Using model checkpoint: epoch 30."
-) as demo:
-    pass
 if __name__ == "__main__":
-    demo.launch()

+"""
+Gradio Interface for SignMotionGPT (HF Spaces Compatible)
+"""
 import os
 import sys
 import re
 import json
 import random
+import argparse
+import time
+import warnings
 from pathlib import Path
+import torch
+import numpy as np
+import gradio as gr
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import smplx
+from transformers import AutoModelForCausalLM, AutoTokenizer
+warnings.filterwarnings("ignore")
+# =====================================================================
+# Configuration & Paths
+# =====================================================================
+# Setup directories for HF Spaces
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+OUTPUT_DIR = os.path.join(BASE_DIR, "generated_outputs")
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# Add project root to path
+sys.path.append(BASE_DIR)
+HF_REPO_ID = os.environ.get("HF_REPO_ID", "rdz-falcon/SignMotionGPTfit-archive")
+HF_SUBFOLDER = os.environ.get("HF_SUBFOLDER", "stage2_v2/epoch-030")
+DATA_DIR = os.environ.get("DATA_DIR", os.path.join(BASE_DIR, "data"))
+DATASET_PATH = os.environ.get("DATASET_PATH", os.path.join(BASE_DIR, "enriched_dataset.json"))
+VQVAE_CHECKPOINT = os.environ.get("VQVAE_CHECKPOINT", os.path.join(DATA_DIR, "vqvae_model.pt"))
+STATS_PATH = os.environ.get("VQVAE_STATS_PATH", os.path.join(DATA_DIR, "vqvae_stats.pt"))
+SMPLX_MODEL_DIR = os.environ.get("SMPLX_MODEL_DIR", os.path.join(DATA_DIR, "smplx_models"))
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Model Config
+M_START = "<M_START>"
+M_END = "<M_END>"
+PAD_TOKEN = "<PAD>"
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
+SMPL_DIM = 182
+CODEBOOK_SIZE = 512
+CODE_DIM = 512
+VQ_ARGS = dict(
+    width=512, depth=3, down_t=2, stride_t=2,
+    dilation_growth_rate=3, activation='relu', norm=None, quantizer="ema_reset"
+)
+PARAM_DIMS = [10, 63, 45, 45, 3, 10, 3, 3]
+PARAM_NAMES = ["betas", "body_pose", "left_hand_pose", "right_hand_pose",
+               "trans", "expression", "jaw_pose", "eye_pose"]
+# =====================================================================
+# Import VQ-VAE architecture
+# =====================================================================
+try:
+    # Try importing from local project structure
+    from mGPT.archs.mgpt_vq import VQVae
+except ImportError:
     try:
+        # Fallback for flat structure
+        from archs.mgpt_vq import VQVae
+    except ImportError:
+        print("⚠️ Warning: Could not import VQVae architecture.")
+        VQVae = None
+# =====================================================================
+# Global Cache
+# =====================================================================
+_model_cache = {
+    "llm_model": None,
+    "llm_tokenizer": None,
+    "vqvae_model": None,
+    "smplx_model": None,
+    "stats": (None, None),
+    "initialized": False
+}
+_word_pid_map = {}
+_example_cache = {}
+# =====================================================================
+# Dataset Loading
+# =====================================================================
+def load_word_pid_mapping():
+    global _word_pid_map
+    if not os.path.exists(DATASET_PATH):
+        # Fallback defaults if dataset missing
+        _word_pid_map = {"push": ["P40"], "send": ["P40"]}
         return
+    try:
+        with open(DATASET_PATH, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        mapping = {}
+        for entry in data:
+            word = (entry.get('word') or entry.get('text_query', '')).lower().strip()
+            pid = entry.get('participant_id')
+            if word and pid:
+                mapping.setdefault(word, set()).add(str(pid))
+        _word_pid_map = {k: sorted(list(v)) for k, v in mapping.items()}
+        print(f"Loaded {len(_word_pid_map)} words from dataset")
+    except Exception as e:
+        print(f"Error loading dataset: {e}")
+def get_random_pids_for_word(word: str, count: int = 2) -> list:
+    pids = _word_pid_map.get(word.lower().strip(), [])
+    if not pids: return []
+    if len(pids) <= count: return pids
+    return random.sample(pids, count)
+# =====================================================================
+# Models
+# =====================================================================
+class MotionGPT_VQVAE_Wrapper(torch.nn.Module):
+    def __init__(self, smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **kwargs):
+        super().__init__()
+        if VQVae is None: raise RuntimeError("VQVae architecture missing")
+        self.vqvae = VQVae(nfeats=smpl_dim, code_num=codebook_size, code_dim=code_dim, output_emb_width=code_dim, **kwargs)
+def initialize_models():
+    global _model_cache
+    if _model_cache["initialized"]: return
+    print("Initializing Models...")
+    load_word_pid_mapping()
+    # LLM
+    print(f"Loading LLM: {HF_REPO_ID}")
+    tok = AutoTokenizer.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True)
+    if tok.pad_token is None: tok.add_special_tokens({"pad_token": PAD_TOKEN})
+    model.resize_token_embeddings(len(tok))
+    model.to(DEVICE).eval()
+    _model_cache["llm_model"] = model
+    _model_cache["llm_tokenizer"] = tok
+    # VQ-VAE
+    if os.path.exists(VQVAE_CHECKPOINT):
+        vq = MotionGPT_VQVAE_Wrapper(**VQ_ARGS).to(DEVICE)
+        ckpt = torch.load(VQVAE_CHECKPOINT, map_location=DEVICE)
+        vq.load_state_dict(ckpt.get('model_state_dict', ckpt), strict=False)
+        vq.eval()
+        _model_cache["vqvae_model"] = vq
+    # Stats
+    if os.path.exists(STATS_PATH):
+        st = torch.load(STATS_PATH, map_location='cpu')
+        _model_cache["stats"] = (st.get('mean', 0), st.get('std', 1))
+    # SMPL-X
+    if os.path.exists(SMPLX_MODEL_DIR):
+        _model_cache["smplx_model"] = smplx.SMPLX(
+            model_path=SMPLX_MODEL_DIR, model_type='smplx', gender='neutral', use_pca=False,
+            create_global_orient=True, create_body_pose=True, create_betas=True,
+            create_expression=True, create_jaw_pose=True, create_left_hand_pose=True,
+            create_right_hand_pose=True, create_transl=True
+        ).to(DEVICE)
+    _model_cache["initialized"] = True
+    print("Models Initialized.")
+# =====================================================================
+# Generation Logic
+# =====================================================================
+def generate_motion_tokens(word: str, variant: str) -> str:
+    model, tok = _model_cache["llm_model"], _model_cache["llm_tokenizer"]
+    prompt = f"Instruction: Generate motion for word '{word}' with variant '{variant}'.\nMotion: "
+    inputs = tok(prompt, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        out = model.generate(
+            **inputs, max_new_tokens=100, do_sample=True,
+            temperature=INFERENCE_TEMPERATURE, top_k=INFERENCE_TOP_K,
+            repetition_penalty=INFERENCE_REPETITION_PENALTY,
+            eos_token_id=tok.convert_tokens_to_ids(M_END)
+        )
+    decoded = tok.decode(out[0], skip_special_tokens=False)
+    return decoded.split("Motion: ")[-1].strip() if "Motion: " in decoded else decoded.strip()
+def decode_tokens_to_params(tokens: list) -> np.ndarray:
+    vq, (mean, std) = _model_cache["vqvae_model"], _model_cache["stats"]
+    if not vq or not tokens: return np.zeros((0, SMPL_DIM))
+    idx = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
+    with torch.no_grad():
+        emb = vq.vqvae.quantizer.codebook[idx].permute(0, 2, 1)
+        decoded = vq.vqvae.decoder(emb)
+        params = vq.vqvae.postprocess(decoded).squeeze(0).cpu().numpy()
+    if mean is not None: params = (params * std) + mean
+    return params
+def params_to_vertices(params: np.ndarray):
+    smpl = _model_cache["smplx_model"]
+    if not smpl or params.shape[0] == 0: return None, None
+    # Split params (simplified logic for brevity)
+    dims = [10, 63, 45, 45, 3, 10, 3, 3]
+    split_params = np.split(params, np.cumsum(dims)[:-1], axis=1)
+    tensor_parts = [torch.from_numpy(p).to(DEVICE).float() for p in split_params]
+    # Batch processing to avoid OOM
+    verts_list = []
+    for i in range(0, params.shape[0], 32):
+        batch = [t[i:i+32] for t in tensor_parts]
+        with torch.no_grad():
+            # Handle global_orient vs body_pose split
+            bp_full = batch[1]
+            go = bp_full[:, :3]
+            bp = bp_full[:, 3:]
+            out = smpl(
+                betas=batch[0], global_orient=go, body_pose=bp,
+                left_hand_pose=batch[2], right_hand_pose=batch[3],
+                transl=batch[4], expression=batch[5],
+                jaw_pose=batch[6], leye_pose=batch[7], reye_pose=batch[7]
+            )
+            verts_list.append(out.vertices.cpu().numpy())
+    return np.concatenate(verts_list, axis=0), smpl.faces
+# =====================================================================
+# Visualization (Plotly -> HTML)
+# =====================================================================
+def create_side_by_side_html(verts1, faces1, verts2, faces2, title1="", title2="", fps=20):
+    # Truncate to matching length
+    min_len = min(len(verts1), len(verts2))
+    v1, v2 = verts1[:min_len], verts2[:min_len]
+    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scene'}, {'type': 'scene'}]],
+                        subplot_titles=[title1, title2])
+    # Add first frame
+    for col, v, c in [(1, v1, '#6FA8DC'), (2, v2, '#93C47D')]:
+        fig.add_trace(go.Mesh3d(
+            x=v[0,:,0], y=v[0,:,1], z=v[0,:,2],
+            i=faces1[:,0], j=faces1[:,1], k=faces1[:,2],
+            color=c, opacity=0.8, flatshading=True
+        ), row=1, col=col)
+    # Frames
+    frames = []
+    for t in range(min_len):
+        frames.append(go.Frame(data=[
+            go.Mesh3d(x=v1[t,:,0], y=v1[t,:,1], z=v1[t,:,2]),
+            go.Mesh3d(x=v2[t,:,0], y=v2[t,:,1], z=v2[t,:,2])
+        ], name=str(t)))
+    fig.frames = frames
+    # Animation settings
+    fig.update_layout(
+        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None])])],
+        scene=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
+        scene2=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
+        height=500, margin=dict(l=0, r=0, t=30, b=0)
+    )
+    return fig.to_html(include_plotlyjs='cdn', full_html=True)
+def create_single_html(verts, faces, title="", fps=20):
+    fig = go.Figure(go.Mesh3d(
+        x=verts[0,:,0], y=verts[0,:,1], z=verts[0,:,2],
+        i=faces[:,0], j=faces[:,1], k=faces[:,2],
+        color='#6FA8DC', opacity=0.8, flatshading=True
+    ))
+    frames = [go.Frame(data=[go.Mesh3d(x=verts[t,:,0], y=verts[t,:,1], z=verts[t,:,2])], name=str(t))
+              for t in range(len(verts))]
+    fig.frames = frames
+    fig.update_layout(
+        title=title,
+        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None])])],
+        scene=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
+        height=500
+    )
+    return fig.to_html(include_plotlyjs='cdn', full_html=True)
+# =====================================================================
+# Main Logic with File Saving
+# =====================================================================
+def save_and_get_iframe(html_content, filename_suffix=""):
+    """Saves HTML to disk and returns an Iframe pointing to it."""
+    filename = f"vis_{int(time.time())}_{filename_suffix}.html"
+    filepath = os.path.join(OUTPUT_DIR, filename)
+    with open(filepath, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    # Use the /file= route to serve the absolute path
+    # allow-same-origin and allow-scripts are crucial for Plotly
+    iframe = f"""
+    <iframe src="/file={filepath}"
+            width="100%" height="550px"
+            style="border:none; background:#fafafa;"
+            sandbox="allow-scripts allow-same-origin">
+    </iframe>
+    """
+    return iframe
+def process_word(word: str):
+    if not word.strip(): return None, ""
+    pids = get_random_pids_for_word(word, 2)
+    if not pids: pids = ["Unknown", "Unknown"]
+    elif len(pids) == 1: pids = [pids[0], pids[0]]
+    # Generate 1
+    raw1 = generate_motion_tokens(word, pids[0])
+    toks1 = [int(x) for x in re.findall(r'<M(\d+)>', raw1)]
+    verts1, faces = params_to_vertices(decode_tokens_to_params(toks1))
+    # Generate 2
+    raw2 = generate_motion_tokens(word, pids[1])
+    toks2 = [int(x) for x in re.findall(r'<M(\d+)>', raw2)]
+    verts2, _ = params_to_vertices(decode_tokens_to_params(toks2))
+    if verts1 is not None and verts2 is not None:
+        html = create_side_by_side_html(verts1, faces, verts2, faces, title1=f"{pids[0]}", title2=f"{pids[1]}")
+    elif verts1 is not None:
+        html = create_single_html(verts1, faces, title=f"{pids[0]}")
+    else:
+        return "<div>Error generating motion</div>", ""
+    iframe = save_and_get_iframe(html, f"{word}")
+    return iframe, f"[{pids[0]}] {len(toks1)} toks\n[{pids[1]}] {len(toks2)} toks"
+# =====================================================================
+# UI
+# =====================================================================
+def create_ui():
+    custom_css = ".gradio-container { max-width: 1400px !important; }"
+    with gr.Blocks(css=custom_css, title="SignMotionGPT") as demo:
+        gr.Markdown("# SignMotionGPT Comparison Demo")
+        with gr.Row():
+            with gr.Column(scale=1):
+                txt_input = gr.Textbox(label="Word", placeholder="push")
+                btn = gr.Button("Generate Comparison", variant="primary")
+                out_toks = gr.Textbox(label="Details", lines=4)
+            with gr.Column(scale=2):
+                out_html = gr.HTML(label="Visualization")
+        btn.click(process_word, inputs=txt_input, outputs=[out_html, out_toks])
+        # Initialize
+        initialize_models()
+    return demo
 if __name__ == "__main__":
+    demo = create_ui()
+    print(f"🚀 Launching. Output dir: {OUTPUT_DIR}")
+    # allowed_paths=[OUTPUT_DIR] is the magic key for HF Spaces
+    demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=[OUTPUT_DIR])