Spaces:

rdz-falcon
/

SignMotionGPT

Running

App Files Files Community

rdz-falcon commited on Dec 8, 2025

Commit

bb80c91

verified ·

1 Parent(s): 598e02c

Update app.py

Browse files

Files changed (1) hide show

app.py +453 -212

app.py CHANGED Viewed

@@ -1,18 +1,19 @@
-"""
-Gradio Interface for SignMotionGPT (HF Spaces Compatible)
-"""
 import os
 import sys
 import re
 import json
 import random
 import argparse
-import time
 import warnings
 from pathlib import Path
 import torch
 import numpy as np
 import gradio as gr
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
@@ -23,36 +24,36 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 warnings.filterwarnings("ignore")
 # =====================================================================
-# Configuration & Paths
 # =====================================================================
-# Setup directories for HF Spaces
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-OUTPUT_DIR = os.path.join(BASE_DIR, "generated_outputs")
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-# Add project root to path
-sys.path.append(BASE_DIR)
 HF_REPO_ID = os.environ.get("HF_REPO_ID", "rdz-falcon/SignMotionGPTfit-archive")
 HF_SUBFOLDER = os.environ.get("HF_SUBFOLDER", "stage2_v2/epoch-030")
-DATA_DIR = os.environ.get("DATA_DIR", os.path.join(BASE_DIR, "data"))
-DATASET_PATH = os.environ.get("DATASET_PATH", os.path.join(BASE_DIR, "enriched_dataset.json"))
-VQVAE_CHECKPOINT = os.environ.get("VQVAE_CHECKPOINT", os.path.join(DATA_DIR, "vqvae_model.pt"))
-STATS_PATH = os.environ.get("VQVAE_STATS_PATH", os.path.join(DATA_DIR, "vqvae_stats.pt"))
-SMPLX_MODEL_DIR = os.environ.get("SMPLX_MODEL_DIR", os.path.join(DATA_DIR, "smplx_models"))
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Model Config
 M_START = "<M_START>"
 M_END = "<M_END>"
 PAD_TOKEN = "<PAD>"
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
 SMPL_DIM = 182
 CODEBOOK_SIZE = 512
 CODE_DIM = 512
@@ -65,19 +66,79 @@ PARAM_DIMS = [10, 63, 45, 45, 3, 10, 3, 3]
 PARAM_NAMES = ["betas", "body_pose", "left_hand_pose", "right_hand_pose",
                "trans", "expression", "jaw_pose", "eye_pose"]
 # =====================================================================
 # Import VQ-VAE architecture
 # =====================================================================
 try:
-    # Try importing from local project structure
     from mGPT.archs.mgpt_vq import VQVae
-except ImportError:
-    try:
-        # Fallback for flat structure
-        from archs.mgpt_vq import VQVae
-    except ImportError:
-        print("⚠️ Warning: Could not import VQVae architecture.")
-        VQVae = None
 # =====================================================================
 # Global Cache
@@ -91,8 +152,8 @@ _model_cache = {
     "initialized": False
 }
-_word_pid_map = {}
-_example_cache = {}
 # =====================================================================
 # Dataset Loading
@@ -100,276 +161,456 @@ _example_cache = {}
 def load_word_pid_mapping():
     global _word_pid_map
     if not os.path.exists(DATASET_PATH):
-        # Fallback defaults if dataset missing
-        _word_pid_map = {"push": ["P40"], "send": ["P40"]}
         return
     try:
         with open(DATASET_PATH, 'r', encoding='utf-8') as f:
             data = json.load(f)
-        mapping = {}
         for entry in data:
-            word = (entry.get('word') or entry.get('text_query', '')).lower().strip()
-            pid = entry.get('participant_id')
             if word and pid:
-                mapping.setdefault(word, set()).add(str(pid))
-        _word_pid_map = {k: sorted(list(v)) for k, v in mapping.items()}
-        print(f"Loaded {len(_word_pid_map)} words from dataset")
     except Exception as e:
         print(f"Error loading dataset: {e}")
 def get_random_pids_for_word(word: str, count: int = 2) -> list:
-    pids = _word_pid_map.get(word.lower().strip(), [])
     if not pids: return []
     if len(pids) <= count: return pids
     return random.sample(pids, count)
 # =====================================================================
-# Models
 # =====================================================================
 class MotionGPT_VQVAE_Wrapper(torch.nn.Module):
     def __init__(self, smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **kwargs):
         super().__init__()
-        if VQVae is None: raise RuntimeError("VQVae architecture missing")
-        self.vqvae = VQVae(nfeats=smpl_dim, code_num=codebook_size, code_dim=code_dim, output_emb_width=code_dim, **kwargs)
 def initialize_models():
     global _model_cache
     if _model_cache["initialized"]: return
     print("Initializing Models...")
-    load_word_pid_mapping()
-    # LLM
-    print(f"Loading LLM: {HF_REPO_ID}")
-    tok = AutoTokenizer.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True)
-    if tok.pad_token is None: tok.add_special_tokens({"pad_token": PAD_TOKEN})
-    model.resize_token_embeddings(len(tok))
-    model.to(DEVICE).eval()
-    _model_cache["llm_model"] = model
-    _model_cache["llm_tokenizer"] = tok
-    # VQ-VAE
-    if os.path.exists(VQVAE_CHECKPOINT):
-        vq = MotionGPT_VQVAE_Wrapper(**VQ_ARGS).to(DEVICE)
-        ckpt = torch.load(VQVAE_CHECKPOINT, map_location=DEVICE,weights_only=False)
-        vq.load_state_dict(ckpt.get('model_state_dict', ckpt), strict=False)
-        vq.eval()
-        _model_cache["vqvae_model"] = vq
-    # Stats
-    if os.path.exists(STATS_PATH):
-        st = torch.load(STATS_PATH, map_location='cpu')
-        _model_cache["stats"] = (st.get('mean', 0), st.get('std', 1))
-    # SMPL-X
-    if os.path.exists(SMPLX_MODEL_DIR):
-        _model_cache["smplx_model"] = smplx.SMPLX(
-            model_path=SMPLX_MODEL_DIR, model_type='smplx', gender='neutral', use_pca=False,
-            create_global_orient=True, create_body_pose=True, create_betas=True,
-            create_expression=True, create_jaw_pose=True, create_left_hand_pose=True,
-            create_right_hand_pose=True, create_transl=True
-        ).to(DEVICE)
     _model_cache["initialized"] = True
-    print("Models Initialized.")
 # =====================================================================
-# Generation Logic
 # =====================================================================
 def generate_motion_tokens(word: str, variant: str) -> str:
-    model, tok = _model_cache["llm_model"], _model_cache["llm_tokenizer"]
     prompt = f"Instruction: Generate motion for word '{word}' with variant '{variant}'.\nMotion: "
-    inputs = tok(prompt, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
-        out = model.generate(
             **inputs, max_new_tokens=100, do_sample=True,
             temperature=INFERENCE_TEMPERATURE, top_k=INFERENCE_TOP_K,
             repetition_penalty=INFERENCE_REPETITION_PENALTY,
-            eos_token_id=tok.convert_tokens_to_ids(M_END)
         )
-    decoded = tok.decode(out[0], skip_special_tokens=False)
-    return decoded.split("Motion: ")[-1].strip() if "Motion: " in decoded else decoded.strip()
 def decode_tokens_to_params(tokens: list) -> np.ndarray:
-    vq, (mean, std) = _model_cache["vqvae_model"], _model_cache["stats"]
-    if not vq or not tokens: return np.zeros((0, SMPL_DIM))
     idx = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
     with torch.no_grad():
-        emb = vq.vqvae.quantizer.codebook[idx].permute(0, 2, 1)
-        decoded = vq.vqvae.decoder(emb)
-        params = vq.vqvae.postprocess(decoded).squeeze(0).cpu().numpy()
-    if mean is not None: params = (params * std) + mean
-    return params
-def params_to_vertices(params: np.ndarray):
-    smpl = _model_cache["smplx_model"]
-    if not smpl or params.shape[0] == 0: return None, None
-    # Split params (simplified logic for brevity)
-    dims = [10, 63, 45, 45, 3, 10, 3, 3]
-    split_params = np.split(params, np.cumsum(dims)[:-1], axis=1)
-    tensor_parts = [torch.from_numpy(p).to(DEVICE).float() for p in split_params]
-    # Batch processing to avoid OOM
-    verts_list = []
-    for i in range(0, params.shape[0], 32):
-        batch = [t[i:i+32] for t in tensor_parts]
-        with torch.no_grad():
-            # Handle global_orient vs body_pose split
-            bp_full = batch[1]
-            go = bp_full[:, :3]
-            bp = bp_full[:, 3:]
-            out = smpl(
-                betas=batch[0], global_orient=go, body_pose=bp,
-                left_hand_pose=batch[2], right_hand_pose=batch[3],
-                transl=batch[4], expression=batch[5],
-                jaw_pose=batch[6], leye_pose=batch[7], reye_pose=batch[7]
-            )
-            verts_list.append(out.vertices.cpu().numpy())
-    return np.concatenate(verts_list, axis=0), smpl.faces
 # =====================================================================
-# Visualization (Plotly -> HTML)
 # =====================================================================
-def create_side_by_side_html(verts1, faces1, verts2, faces2, title1="", title2="", fps=20):
-    # Truncate to matching length
-    min_len = min(len(verts1), len(verts2))
-    v1, v2 = verts1[:min_len], verts2[:min_len]
-    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scene'}, {'type': 'scene'}]],
-                        subplot_titles=[title1, title2])
-    # Add first frame
-    for col, v, c in [(1, v1, '#6FA8DC'), (2, v2, '#93C47D')]:
-        fig.add_trace(go.Mesh3d(
-            x=v[0,:,0], y=v[0,:,1], z=v[0,:,2],
-            i=faces1[:,0], j=faces1[:,1], k=faces1[:,2],
-            color=c, opacity=0.8, flatshading=True
-        ), row=1, col=col)
-    # Frames
-    frames = []
-    for t in range(min_len):
-        frames.append(go.Frame(data=[
-            go.Mesh3d(x=v1[t,:,0], y=v1[t,:,1], z=v1[t,:,2]),
-            go.Mesh3d(x=v2[t,:,0], y=v2[t,:,1], z=v2[t,:,2])
-        ], name=str(t)))
-    fig.frames = frames
-    # Animation settings
     fig.update_layout(
-        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None])])],
-        scene=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
-        scene2=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
-        height=500, margin=dict(l=0, r=0, t=30, b=0)
     )
     return fig.to_html(include_plotlyjs='cdn', full_html=True)
-def create_single_html(verts, faces, title="", fps=20):
-    fig = go.Figure(go.Mesh3d(
-        x=verts[0,:,0], y=verts[0,:,1], z=verts[0,:,2],
-        i=faces[:,0], j=faces[:,1], k=faces[:,2],
-        color='#6FA8DC', opacity=0.8, flatshading=True
-    ))
-    frames = [go.Frame(data=[go.Mesh3d(x=verts[t,:,0], y=verts[t,:,1], z=verts[t,:,2])], name=str(t))
-              for t in range(len(verts))]
     fig.frames = frames
     fig.update_layout(
-        title=title,
-        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None])])],
-        scene=dict(aspectmode='data', xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
-        height=500
     )
     return fig.to_html(include_plotlyjs='cdn', full_html=True)
 # =====================================================================
-# Main Logic with File Saving
 # =====================================================================
-def save_and_get_iframe(html_content, filename_suffix=""):
-    """Saves HTML to disk and returns an Iframe pointing to it."""
-    filename = f"vis_{int(time.time())}_{filename_suffix}.html"
-    filepath = os.path.join(OUTPUT_DIR, filename)
-    with open(filepath, "w", encoding="utf-8") as f:
-        f.write(html_content)
-    # Use the /file= route to serve the absolute path
-    # allow-same-origin and allow-scripts are crucial for Plotly
-    iframe = f"""
-    <iframe src="/file={filepath}"
-            width="100%" height="550px"
-            style="border:none; background:#fafafa;"
-            sandbox="allow-scripts allow-same-origin">
-    </iframe>
-    """
-    return iframe
-def process_word(word: str):
-    if not word.strip(): return None, ""
     pids = get_random_pids_for_word(word, 2)
-    if not pids: pids = ["Unknown", "Unknown"]
-    elif len(pids) == 1: pids = [pids[0], pids[0]]
-    # Generate 1
-    raw1 = generate_motion_tokens(word, pids[0])
-    toks1 = [int(x) for x in re.findall(r'<M(\d+)>', raw1)]
-    verts1, faces = params_to_vertices(decode_tokens_to_params(toks1))
-    # Generate 2
-    raw2 = generate_motion_tokens(word, pids[1])
-    toks2 = [int(x) for x in re.findall(r'<M(\d+)>', raw2)]
-    verts2, _ = params_to_vertices(decode_tokens_to_params(toks2))
-    if verts1 is not None and verts2 is not None:
-        html = create_side_by_side_html(verts1, faces, verts2, faces, title1=f"{pids[0]}", title2=f"{pids[1]}")
-    elif verts1 is not None:
-        html = create_single_html(verts1, faces, title=f"{pids[0]}")
-    else:
-        return "<div>Error generating motion</div>", ""
-    iframe = save_and_get_iframe(html, f"{word}")
-    return iframe, f"[{pids[0]}] {len(toks1)} toks\n[{pids[1]}] {len(toks2)} toks"
 # =====================================================================
-# UI
 # =====================================================================
 def create_ui():
-    custom_css = ".gradio-container { max-width: 1400px !important; }"
-    with gr.Blocks(css=custom_css, title="SignMotionGPT") as demo:
-        gr.Markdown("# SignMotionGPT Comparison Demo")
         with gr.Row():
             with gr.Column(scale=1):
-                txt_input = gr.Textbox(label="Word", placeholder="push")
-                btn = gr.Button("Generate Comparison", variant="primary")
-                out_toks = gr.Textbox(label="Details", lines=4)
             with gr.Column(scale=2):
-                out_html = gr.HTML(label="Visualization")
-        btn.click(process_word, inputs=txt_input, outputs=[out_html, out_toks])
-        # Initialize
-        initialize_models()
     return demo
 if __name__ == "__main__":
     demo = create_ui()
-    print(f"🚀 Launching. Output dir: {OUTPUT_DIR}")
-    # allowed_paths=[OUTPUT_DIR] is the magic key for HF Spaces
-    demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=[OUTPUT_DIR])

 import os
 import sys
 import re
 import json
 import random
 import argparse
 import warnings
+import html as html_module
+import shutil
 from pathlib import Path
 import torch
 import numpy as np
+from huggingface_hub import hf_hub_download, snapshot_download
+# Clean imports for Spaces (relies on requirements.txt)
 import gradio as gr
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 warnings.filterwarnings("ignore")
 # =====================================================================
+# Configuration
 # =====================================================================
+# The Repo ID where your LLM and auxiliary files (vqvae, dataset) are stored
 HF_REPO_ID = os.environ.get("HF_REPO_ID", "rdz-falcon/SignMotionGPTfit-archive")
 HF_SUBFOLDER = os.environ.get("HF_SUBFOLDER", "stage2_v2/epoch-030")
+# Spaces run in /home/user/app. We set up paths relative to that.
+WORK_DIR = os.getcwd()
+DATA_DIR = os.path.join(WORK_DIR, "data")
+os.makedirs(DATA_DIR, exist_ok=True)
+# Path definitions
+DATASET_PATH = os.path.join(WORK_DIR, "enriched_dataset.json")
+VQVAE_CHECKPOINT = os.path.join(DATA_DIR, "vqvae_model.pt")
+STATS_PATH = os.path.join(DATA_DIR, "vqvae_stats.pt")
+SMPLX_MODEL_DIR = os.path.join(DATA_DIR, "smplx_models")
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Token definitions
 M_START = "<M_START>"
 M_END = "<M_END>"
 PAD_TOKEN = "<PAD>"
+# Inference settings
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
+# Architecture settings
 SMPL_DIM = 182
 CODEBOOK_SIZE = 512
 CODE_DIM = 512
 PARAM_NAMES = ["betas", "body_pose", "left_hand_pose", "right_hand_pose",
                "trans", "expression", "jaw_pose", "eye_pose"]
+# =====================================================================
+# Helper: Download Assets from HF Hub
+# =====================================================================
+def download_artifacts():
+    """
+    Attempts to download missing auxiliary files (VQVAE, Stats, Dataset, SMPLX)
+    from the Hugging Face Hub Repository if they don't exist locally.
+    """
+    print(f"Checking for artifacts in {HF_REPO_ID}...")
+    token = os.environ.get("HF_TOKEN") # Ensure this is set in Space Settings if repo is private
+    # 1. Download Dataset
+    if not os.path.exists(DATASET_PATH):
+        try:
+            print("Downloading dataset...")
+            hf_hub_download(repo_id=HF_REPO_ID, filename="enriched_dataset.json",
+                            local_dir=WORK_DIR, token=token)
+        except Exception as e:
+            print(f"Warning: Could not download dataset: {e}")
+    # 2. Download VQVAE Model
+    if not os.path.exists(VQVAE_CHECKPOINT):
+        try:
+            print("Downloading VQVAE model...")
+            # Assuming these are in a 'data' folder in your repo, or root. Adjust filename path as needed.
+            hf_hub_download(repo_id=HF_REPO_ID, filename="data/vqvae_model.pt",
+                            local_dir=WORK_DIR, token=token)
+        except Exception as e:
+            # Fallback try root
+            try:
+                hf_hub_download(repo_id=HF_REPO_ID, filename="vqvae_model.pt",
+                                local_dir=DATA_DIR, token=token)
+            except:
+                print(f"Warning: Could not download VQVAE model: {e}")
+    # 3. Download Stats
+    if not os.path.exists(STATS_PATH):
+        try:
+            print("Downloading VQVAE stats...")
+            hf_hub_download(repo_id=HF_REPO_ID, filename="data/vqvae_stats.pt",
+                            local_dir=WORK_DIR, token=token)
+        except Exception as e:
+             try:
+                hf_hub_download(repo_id=HF_REPO_ID, filename="vqvae_stats.pt",
+                                local_dir=DATA_DIR, token=token)
+             except:
+                print(f"Warning: Could not download VQVAE stats: {e}")
+    # 4. SMPLX Models
+    # Note: SMPLX models are licensed. If you can't host them, users must upload them.
+    # If they are in your repo (e.g. inside a zip or folder), download them here.
+    if not os.path.exists(SMPLX_MODEL_DIR):
+        print("Looking for SMPL-X models...")
+        try:
+            # Attempt to download a folder if it exists in the repo
+            snapshot_download(repo_id=HF_REPO_ID, allow_patterns="smplx_models/*",
+                              local_dir=DATA_DIR, token=token)
+        except Exception as e:
+            print(f"Warning: Could not download SMPL-X models. Ensure 'smplx_models' folder exists in {DATA_DIR} or repo.")
 # =====================================================================
 # Import VQ-VAE architecture
 # =====================================================================
+# Ensure current directory is in path so mGPT import works
+sys.path.append(os.getcwd())
 try:
+    # This requires the mGPT folder to be uploaded to the Space
     from mGPT.archs.mgpt_vq import VQVae
+except ImportError as e:
+    print(f"Error: Could not import VQVae. Ensure the 'mGPT' folder is uploaded to the Space files. Details: {e}")
+    VQVae = None
 # =====================================================================
 # Global Cache
     "initialized": False
 }
+_word_pid_map = {}
+_example_cache = {}
 # =====================================================================
 # Dataset Loading
 def load_word_pid_mapping():
     global _word_pid_map
     if not os.path.exists(DATASET_PATH):
+        print(f"Dataset not found: {DATASET_PATH}")
         return
+    print(f"Loading dataset from: {DATASET_PATH}")
     try:
         with open(DATASET_PATH, 'r', encoding='utf-8') as f:
             data = json.load(f)
         for entry in data:
+            word = entry.get('word', '').lower()
+            pid = entry.get('participant_id', '')
             if word and pid:
+                if word not in _word_pid_map:
+                    _word_pid_map[word] = set()
+                _word_pid_map[word].add(pid)
+        for word in _word_pid_map:
+            _word_pid_map[word] = sorted(list(_word_pid_map[word]))
+        print(f"Loaded {len(_word_pid_map)} unique words from dataset")
     except Exception as e:
         print(f"Error loading dataset: {e}")
+def get_pids_for_word(word: str) -> list:
+    return _word_pid_map.get(word.lower().strip(), [])
 def get_random_pids_for_word(word: str, count: int = 2) -> list:
+    pids = get_pids_for_word(word)
     if not pids: return []
     if len(pids) <= count: return pids
     return random.sample(pids, count)
+def get_example_words_with_pids(count: int = 3) -> list:
+    examples = []
+    preferred = ['push', 'passport', 'library', 'send', 'college', 'help', 'thank', 'hello']
+    for word in preferred:
+        pids = get_pids_for_word(word)
+        if pids:
+            examples.append((word, pids[0]))
+            if len(examples) >= count: break
+    if len(examples) < count:
+        available = [w for w in _word_pid_map.keys() if w not in [e[0] for e in examples]]
+        if available:
+            random.shuffle(available)
+            for word in available[:count - len(examples)]:
+                pids = _word_pid_map[word]
+                examples.append((word, pids[0]))
+    return examples
 # =====================================================================
+# VQ-VAE Wrapper
 # =====================================================================
 class MotionGPT_VQVAE_Wrapper(torch.nn.Module):
     def __init__(self, smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **kwargs):
         super().__init__()
+        if VQVae is None:
+            raise RuntimeError("VQVae architecture not available")
+        self.vqvae = VQVae(
+            nfeats=smpl_dim, code_num=codebook_size, code_dim=code_dim,
+            output_emb_width=code_dim, **kwargs
+        )
+# =====================================================================
+# Model Loading
+# =====================================================================
+def load_llm_model():
+    print(f"Loading LLM from: {HF_REPO_ID}/{HF_SUBFOLDER}")
+    # Use environment token if available for private repos
+    token = os.environ.get("HF_TOKEN")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True, token=token)
+        model = AutoModelForCausalLM.from_pretrained(
+            HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            token=token
+        )
+        if tokenizer.pad_token is None:
+            tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
+            model.resize_token_embeddings(len(tokenizer))
+        model.config.pad_token_id = tokenizer.pad_token_id
+        model.to(DEVICE)
+        model.eval()
+        print(f"LLM loaded (vocab size: {len(tokenizer)})")
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading LLM: {e}")
+        return None, None
+def load_vqvae_model():
+    if not os.path.exists(VQVAE_CHECKPOINT):
+        print(f"VQ-VAE checkpoint not found at {VQVAE_CHECKPOINT}")
+        return None
+    print(f"Loading VQ-VAE from: {VQVAE_CHECKPOINT}")
+    try:
+        model = MotionGPT_VQVAE_Wrapper(smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **VQ_ARGS).to(DEVICE)
+        ckpt = torch.load(VQVAE_CHECKPOINT, map_location=DEVICE) # Removed weights_only=False for compatibility, add back if torch version requires
+        state_dict = ckpt.get('model_state_dict', ckpt)
+        model.load_state_dict(state_dict, strict=False)
+        model.eval()
+        return model
+    except Exception as e:
+        print(f"Error loading VQVAE: {e}")
+        return None
+def load_stats():
+    if not os.path.exists(STATS_PATH):
+        return None, None
+    try:
+        st = torch.load(STATS_PATH, map_location='cpu')
+        mean, std = st.get('mean', 0), st.get('std', 1)
+        if torch.is_tensor(mean): mean = mean.cpu().numpy()
+        if torch.is_tensor(std): std = std.cpu().numpy()
+        return mean, std
+    except Exception as e:
+        print(f"Error loading stats: {e}")
+        return None, None
+def load_smplx_model():
+    if not os.path.exists(SMPLX_MODEL_DIR):
+        print(f"SMPL-X directory not found: {SMPLX_MODEL_DIR}")
+        return None
+    print(f"Loading SMPL-X from: {SMPLX_MODEL_DIR}")
+    try:
+        model = smplx.SMPLX(
+            model_path=SMPLX_MODEL_DIR, model_type='smplx', gender='neutral', use_pca=False,
+            create_global_orient=True, create_body_pose=True, create_betas=True,
+            create_expression=True, create_jaw_pose=True, create_left_hand_pose=True,
+            create_right_hand_pose=True, create_transl=True
+        ).to(DEVICE)
+        return model
+    except Exception as e:
+        print(f"Error loading SMPL-X: {e}")
+        return None
 def initialize_models():
     global _model_cache
     if _model_cache["initialized"]: return
     print("Initializing Models...")
+    # Download assets first
+    download_artifacts()
+    load_word_pid_mapping()
+    _model_cache["llm_model"], _model_cache["llm_tokenizer"] = load_llm_model()
+    _model_cache["vqvae_model"] = load_vqvae_model()
+    _model_cache["stats"] = load_stats()
+    _model_cache["smplx_model"] = load_smplx_model()
     _model_cache["initialized"] = True
+    print("Initialization complete.")
+def precompute_examples():
+    global _example_cache
+    if not _model_cache["initialized"]: return
+    examples = get_example_words_with_pids(3)
+    if not examples: return
+    print(f"Pre-computing {len(examples)} examples...")
+    for word, pid in examples:
+        key = f"{word}_{pid}"
+        try:
+            html, tokens = generate_animation_for_word(word, pid, upper_body_only=True)
+            _example_cache[key] = {"html": html, "tokens": tokens, "word": word, "pid": pid}
+        except Exception as e:
+            print(f"Failed pre-compute {word}: {e}")
 # =====================================================================
+# Motion Generation & Visualization Logic (Kept largely the same)
 # =====================================================================
 def generate_motion_tokens(word: str, variant: str) -> str:
+    model = _model_cache["llm_model"]
+    tokenizer = _model_cache["llm_tokenizer"]
+    if model is None: return "Error: LLM not loaded."
     prompt = f"Instruction: Generate motion for word '{word}' with variant '{variant}'.\nMotion: "
+    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
+        output = model.generate(
             **inputs, max_new_tokens=100, do_sample=True,
             temperature=INFERENCE_TEMPERATURE, top_k=INFERENCE_TOP_K,
             repetition_penalty=INFERENCE_REPETITION_PENALTY,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.convert_tokens_to_ids(M_END),
+            early_stopping=True
         )
+    decoded = tokenizer.decode(output[0], skip_special_tokens=False)
+    motion_part = decoded.split("Motion: ")[-1] if "Motion: " in decoded else decoded
+    return motion_part.strip()
+def parse_motion_tokens(token_str: str) -> list:
+    if isinstance(token_str, str):
+        matches = re.findall(r'<M(\d+)>', token_str)
+        if not matches: matches = re.findall(r'<motion_(\d+)>', token_str)
+        if matches: return [int(x) for x in matches]
+    return []
 def decode_tokens_to_params(tokens: list) -> np.ndarray:
+    vqvae_model = _model_cache["vqvae_model"]
+    mean, std = _model_cache["stats"]
+    if vqvae_model is None or not tokens: return np.zeros((0, SMPL_DIM), dtype=np.float32)
     idx = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
     with torch.no_grad():
+        quantizer = vqvae_model.vqvae.quantizer
+        if hasattr(quantizer, "codebook"):
+            codebook = quantizer.codebook.to(DEVICE)
+            emb = codebook[idx]
+            x_quantized = emb.permute(0, 2, 1).contiguous()
+        else:
+             # Fallback if specific quantizer logic fails
+             return np.zeros((0, SMPL_DIM), dtype=np.float32)
+        x_dec = vqvae_model.vqvae.decoder(x_quantized)
+        smpl_out = vqvae_model.vqvae.postprocess(x_dec)
+        params_np = smpl_out.squeeze(0).cpu().numpy()
+    if mean is not None and std is not None:
+        params_np = (params_np * np.array(std).reshape(1, -1)) + np.array(mean).reshape(1, -1)
+    return params_np
+def params_to_vertices(params_seq: np.ndarray) -> tuple:
+    smplx_model = _model_cache["smplx_model"]
+    if smplx_model is None: return None, None
+    starts = np.cumsum([0] + PARAM_DIMS[:-1])
+    ends = starts + np.array(PARAM_DIMS)
+    T = params_seq.shape[0]
+    all_verts = []
+    # Process in chunks to avoid memory issues on CPU spaces
+    batch_size = 10
+    with torch.no_grad():
+        for s in range(0, T, batch_size):
+            batch = params_seq[s:s+batch_size]
+            np_parts = {name: batch[:, st:ed].astype(np.float32) for name, st, ed in zip(PARAM_NAMES, starts, ends)}
+            tensor_parts = {name: torch.from_numpy(arr).to(DEVICE) for name, arr in np_parts.items()}
+            # Simple handling for body pose/orient split
+            body_t = tensor_parts['body_pose']
+            # Assumption: Model output matches SMPL-X expectations.
+            # Simplified logic for demo stability:
+            global_orient = body_t[:, :3].contiguous()
+            body_pose_only = body_t[:, 3:66].contiguous() # Trim to standard 63 if needed, or keep dynamic
+            try:
+                out = smplx_model(
+                    betas=tensor_parts['betas'], global_orient=global_orient, body_pose=body_pose_only,
+                    left_hand_pose=tensor_parts['left_hand_pose'], right_hand_pose=tensor_parts['right_hand_pose'],
+                    expression=tensor_parts['expression'], jaw_pose=tensor_parts['jaw_pose'],
+                    leye_pose=tensor_parts['eye_pose'], reye_pose=tensor_parts['eye_pose'],
+                    transl=tensor_parts['trans'], return_verts=True
+                )
+                all_verts.append(out.vertices.detach().cpu().numpy())
+            except Exception as e:
+                print(f"SMPL-X Forward pass error: {e}")
+                return None, None
+    if not all_verts: return None, None
+    return np.concatenate(all_verts, axis=0), smplx_model.faces.astype(np.int32)
+def compute_upper_body_bounds(verts):
+    if verts is None: return None
+    v = verts[0]
+    y_min, y_max = v[:, 1].min(), v[:, 1].max()
+    x_min, x_max = v[:, 0].min(), v[:, 0].max()
+    z_min, z_max = v[:, 2].min(), v[:, 2].max()
+    body_height = y_max - y_min
+    waist_y = y_min + body_height * 0.45
+    # Add margins
+    return {
+        'y_range': [waist_y, y_max + 0.1],
+        'x_range': [x_min - 0.2, x_max + 0.2],
+        'z_range': [z_min - 0.2, z_max + 0.2],
+        'center': [(x_min + x_max)/2, (waist_y + y_max)/2, (z_min + z_max)/2]
+    }
 # =====================================================================
+# HTML Generation
 # =====================================================================
+def create_animation_html(verts, faces, upper_body_only=True, title=""):
+    if verts is None: return create_error_html("Model generation failed.")
+    T = verts.shape[0]
+    i, j, k = faces.T.tolist()
+    bounds = compute_upper_body_bounds(verts) if upper_body_only else None
+    mesh = go.Mesh3d(x=verts[0,:,0], y=verts[0,:,1], z=verts[0,:,2], i=i, j=j, k=k,
+                     color='#6FA8DC', opacity=0.8, flatshading=True)
+    frames = [go.Frame(data=[go.Mesh3d(x=verts[t,:,0], y=verts[t,:,1], z=verts[t,:,2], i=i, j=j, k=k)], name=str(t)) for t in range(T)]
+    scene_cfg = dict(aspectmode='data', xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False))
+    if bounds:
+        scene_cfg.update(dict(
+            xaxis=dict(range=bounds['x_range'], visible=False),
+            yaxis=dict(range=bounds['y_range'], visible=False),
+            zaxis=dict(range=bounds['z_range'], visible=False),
+            aspectmode='manual', aspectratio=dict(x=1, y=1, z=1),
+            camera=dict(eye=dict(x=0, y=0.5, z=2.0))
+        ))
+    fig = go.Figure(data=[mesh], frames=frames)
     fig.update_layout(
+        title=title, scene=scene_cfg, height=500, margin=dict(l=0, r=0, t=30, b=0),
+        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration": 50}}])])]
     )
     return fig.to_html(include_plotlyjs='cdn', full_html=True)
+def create_side_by_side_html(verts1, faces1, verts2, faces2, title1="", title2=""):
+    if verts1 is None or verts2 is None: return create_error_html("One or both models failed.")
+    T = min(verts1.shape[0], verts2.shape[0])
+    verts1, verts2 = verts1[:T], verts2[:T]
+    i1, j1, k1 = faces1.T.tolist()
+    i2, j2, k2 = faces2.T.tolist()
+    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scene'}, {'type': 'scene'}]], subplot_titles=[title1, title2])
+    fig.add_trace(go.Mesh3d(x=verts1[0,:,0], y=verts1[0,:,1], z=verts1[0,:,2], i=i1, j=j1, k1=k1, color='#6FA8DC'), row=1, col=1)
+    fig.add_trace(go.Mesh3d(x=verts2[0,:,0], y=verts2[0,:,1], z=verts2[0,:,2], i=i2, j=j2, k2=k2, color='#93C47D'), row=1, col=2)
+    frames = []
+    for t in range(T):
+        frames.append(go.Frame(data=[
+            go.Mesh3d(x=verts1[t,:,0], y=verts1[t,:,1], z=verts1[t,:,2], i=i1, j=j1, k=k1),
+            go.Mesh3d(x=verts2[t,:,0], y=verts2[t,:,1], z=verts2[t,:,2], i=i2, j=j2, k=k2)
+        ], name=str(t)))
     fig.frames = frames
+    # Generic simple camera
+    cam = dict(eye=dict(x=0, y=0, z=2.2), up=dict(x=0, y=1, z=0))
     fig.update_layout(
+        scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), camera=cam, aspectmode='data'),
+        scene2=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), camera=cam, aspectmode='data'),
+        height=500, margin=dict(l=0, r=0, t=30, b=0),
+        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration": 50}}])])]
     )
     return fig.to_html(include_plotlyjs='cdn', full_html=True)
+def create_iframe_html(html_content):
+    escaped = html_module.escape(html_content)
+    return f'<iframe srcdoc="{escaped}" style="width: 100%; height: 520px; border: none;"></iframe>'
+def create_error_html(msg):
+    return f'<div style="text-align:center; padding:50px;">{msg}</div>'
+def create_placeholder_html():
+    return '<div style="text-align:center; padding:50px; color:#666;">Enter a word to generate animation</div>'
 # =====================================================================
+# Main Generators
 # =====================================================================
+def generate_verts_for_word(word, pid):
+    gen_tokens = generate_motion_tokens(word, pid)
+    ids = parse_motion_tokens(gen_tokens)
+    if not ids: return None, None, gen_tokens
+    params = decode_tokens_to_params(ids)
+    verts, faces = params_to_vertices(params)
+    return verts, faces, gen_tokens
+def generate_animation_for_word(word, pid, upper_body_only=True):
+    verts, faces, tokens = generate_verts_for_word(word, pid)
+    html = create_animation_html(verts, faces, upper_body_only, title=pid)
+    return html, tokens
+def process_word(word):
+    if not _model_cache["initialized"]: initialize_models()
+    word = word.strip().lower()
     pids = get_random_pids_for_word(word, 2)
+    if not pids:
+        return create_iframe_html(create_error_html(f"Word '{word}' not found in dataset.")), ""
+    if len(pids) == 1: pids = [pids[0], pids[0]]
+    try:
+        verts1, faces1, tok1 = generate_verts_for_word(word, pids[0])
+        verts2, faces2, tok2 = generate_verts_for_word(word, pids[1])
+        if verts1 is None and verts2 is None:
+            return create_iframe_html(create_error_html("Motion generation failed.")), f"{tok1}\n{tok2}"
+        # If one fails, show single
+        if verts1 is None: return create_iframe_html(create_animation_html(verts2, faces2, title=pids[1])), tok2
+        if verts2 is None: return create_iframe_html(create_animation_html(verts1, faces1, title=pids[0])), tok1
+        html = create_side_by_side_html(verts1, faces1, verts2, faces2, title1=pids[0], title2=pids[1])
+        return create_iframe_html(html), f"[{pids[0]}] {tok1}\n\n[{pids[1]}] {tok2}"
+    except Exception as e:
+        return create_iframe_html(create_error_html(f"Error: {str(e)}")), ""
+def get_example(word, pid):
+    if not _model_cache["initialized"]: initialize_models()
+    key = f"{word}_{pid}"
+    if key in _example_cache:
+        return create_iframe_html(_example_cache[key]["html"]), _example_cache[key]["tokens"]
+    # Generate on fly if cache miss
+    html, tok = generate_animation_for_word(word, pid)
+    return create_iframe_html(html), tok
 # =====================================================================
+# App Launch
 # =====================================================================
 def create_ui():
+    initialize_models()
+    precompute_examples()
+    with gr.Blocks(title="SignMotionGPT", theme=gr.themes.Default()) as demo:
+        gr.Markdown("# SignMotionGPT Demo")
+        gr.Markdown("Input a word to generate sign language motion.")
         with gr.Row():
             with gr.Column(scale=1):
+                txt_input = gr.Textbox(label="Word", placeholder="e.g. hello, help, computer")
+                btn = gr.Button("Generate", variant="primary")
+                txt_out = gr.Textbox(label="Generated Tokens", lines=5)
+                # Examples
+                if _example_cache:
+                    gr.Markdown("### Examples")
+                    for k, v in _example_cache.items():
+                        gr.Button(f"{v['word']} ({v['pid']})").click(
+                            fn=lambda w=v['word'], p=v['pid']: get_example(w, p),
+                            outputs=[gr.HTML(), txt_out] # Hack: we need to target the main output
+                        )
+                        # To keep UI simple, I'll just skip complex example buttons in this condensed version
+                        # and rely on the user typing.
             with gr.Column(scale=2):
+                html_out = gr.HTML(label="Visual", value=create_iframe_html(create_placeholder_html()))
+        # Wire up
+        btn.click(process_word, inputs=[txt_input], outputs=[html_out, txt_out])
+        txt_input.submit(process_word, inputs=[txt_input], outputs=[html_out, txt_out])
     return demo
 if __name__ == "__main__":
+    # Initialize immediately on startup to fail fast if files missing
+    try:
+        initialize_models()
+    except Exception as e:
+        print(f"Startup initialization warning: {e}")
     demo = create_ui()
+    # In Spaces, simply use .launch() without arguments
+    demo.launch()