Spaces:

rdz-falcon
/

SignMotionGPT

Running

App Files Files Community

rdz-falcon commited on Dec 8, 2025

Commit

cfc32bc

verified ·

1 Parent(s): 0ea30b4

Update app.py

Browse files

Files changed (1) hide show

app.py +617 -317

app.py CHANGED Viewed

@@ -1,37 +1,24 @@
 import os
 import sys
 import re
 import json
 import random
-import argparse
 import warnings
 import html as html_module
-import shutil
-from pathlib import Path
 import torch
 import numpy as np
-from huggingface_hub import hf_hub_download, snapshot_download
-# Clean imports for Spaces (relies on requirements.txt)
-import gradio as gr
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import smplx
-from transformers import AutoModelForCausalLM, AutoTokenizer
 warnings.filterwarnings("ignore")
 # =====================================================================
-# Configuration
 # =====================================================================
-# The Repo ID where your LLM and auxiliary files (vqvae, dataset) are stored
-HF_REPO_ID = os.environ.get("HF_REPO_ID", "rdz-falcon/SignMotionGPTfit-archive")
-HF_SUBFOLDER = os.environ.get("HF_SUBFOLDER", "stage2_v2/epoch-030")
-# Spaces run in /home/user/app. We set up paths relative to that.
-WORK_DIR = os.getcwd()
 DATA_DIR = os.path.join(WORK_DIR, "data")
 os.makedirs(DATA_DIR, exist_ok=True)
@@ -41,19 +28,21 @@ VQVAE_CHECKPOINT = os.path.join(DATA_DIR, "vqvae_model.pt")
 STATS_PATH = os.path.join(DATA_DIR, "vqvae_stats.pt")
 SMPLX_MODEL_DIR = os.path.join(DATA_DIR, "smplx_models")
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Token definitions
 M_START = "<M_START>"
 M_END = "<M_END>"
 PAD_TOKEN = "<PAD>"
-# Inference settings
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
-# Architecture settings
 SMPL_DIM = 182
 CODEBOOK_SIZE = 512
 CODE_DIM = 512
@@ -67,77 +56,45 @@ PARAM_NAMES = ["betas", "body_pose", "left_hand_pose", "right_hand_pose",
                "trans", "expression", "jaw_pose", "eye_pose"]
 # =====================================================================
-# Helper: Download Assets from HF Hub
 # =====================================================================
-def download_artifacts():
-    """
-    Attempts to download missing auxiliary files (VQVAE, Stats, Dataset, SMPLX)
-    from the Hugging Face Hub Repository if they don't exist locally.
-    """
-    print(f"Checking for artifacts in {HF_REPO_ID}...")
-    token = os.environ.get("HF_TOKEN") # Ensure this is set in Space Settings if repo is private
-    # 1. Download Dataset
-    if not os.path.exists(DATASET_PATH):
-        try:
-            print("Downloading dataset...")
-            hf_hub_download(repo_id=HF_REPO_ID, filename="enriched_dataset.json",
-                            local_dir=WORK_DIR, token=token)
-        except Exception as e:
-            print(f"Warning: Could not download dataset: {e}")
-    # 2. Download VQVAE Model
-    if not os.path.exists(VQVAE_CHECKPOINT):
-        try:
-            print("Downloading VQVAE model...")
-            # Assuming these are in a 'data' folder in your repo, or root. Adjust filename path as needed.
-            hf_hub_download(repo_id=HF_REPO_ID, filename="data/vqvae_model.pt",
-                            local_dir=WORK_DIR, token=token)
-        except Exception as e:
-            # Fallback try root
-            try:
-                hf_hub_download(repo_id=HF_REPO_ID, filename="vqvae_model.pt",
-                                local_dir=DATA_DIR, token=token)
-            except:
-                print(f"Warning: Could not download VQVAE model: {e}")
-    # 3. Download Stats
-    if not os.path.exists(STATS_PATH):
-        try:
-            print("Downloading VQVAE stats...")
-            hf_hub_download(repo_id=HF_REPO_ID, filename="data/vqvae_stats.pt",
-                            local_dir=WORK_DIR, token=token)
-        except Exception as e:
-             try:
-                hf_hub_download(repo_id=HF_REPO_ID, filename="vqvae_stats.pt",
-                                local_dir=DATA_DIR, token=token)
-             except:
-                print(f"Warning: Could not download VQVAE stats: {e}")
-    # 4. SMPLX Models
-    # Note: SMPLX models are licensed. If you can't host them, users must upload them.
-    # If they are in your repo (e.g. inside a zip or folder), download them here.
-    if not os.path.exists(SMPLX_MODEL_DIR):
-        print("Looking for SMPL-X models...")
-        try:
-            # Attempt to download a folder if it exists in the repo
-            snapshot_download(repo_id=HF_REPO_ID, allow_patterns="smplx_models/*",
-                              local_dir=DATA_DIR, token=token)
-        except Exception as e:
-            print(f"Warning: Could not download SMPL-X models. Ensure 'smplx_models' folder exists in {DATA_DIR} or repo.")
 # =====================================================================
 # Import VQ-VAE architecture
 # =====================================================================
-# Ensure current directory is in path so mGPT import works
-sys.path.append(os.getcwd())
 try:
-    # This requires the mGPT folder to be uploaded to the Space
     from mGPT.archs.mgpt_vq import VQVae
 except ImportError as e:
-    print(f"Error: Could not import VQVae. Ensure the 'mGPT' folder is uploaded to the Space files. Details: {e}")
     VQVae = None
 # =====================================================================
@@ -152,14 +109,16 @@ _model_cache = {
     "initialized": False
 }
-_word_pid_map = {}
-_example_cache = {}
 # =====================================================================
-# Dataset Loading
 # =====================================================================
 def load_word_pid_mapping():
     global _word_pid_map
     if not os.path.exists(DATASET_PATH):
         print(f"Dataset not found: {DATASET_PATH}")
         return
@@ -177,37 +136,50 @@ def load_word_pid_mapping():
                     _word_pid_map[word] = set()
                 _word_pid_map[word].add(pid)
         for word in _word_pid_map:
             _word_pid_map[word] = sorted(list(_word_pid_map[word]))
         print(f"Loaded {len(_word_pid_map)} unique words from dataset")
     except Exception as e:
         print(f"Error loading dataset: {e}")
 def get_pids_for_word(word: str) -> list:
-    return _word_pid_map.get(word.lower().strip(), [])
 def get_random_pids_for_word(word: str, count: int = 2) -> list:
     pids = get_pids_for_word(word)
-    if not pids: return []
-    if len(pids) <= count: return pids
     return random.sample(pids, count)
 def get_example_words_with_pids(count: int = 3) -> list:
     examples = []
     preferred = ['push', 'passport', 'library', 'send', 'college', 'help', 'thank', 'hello']
     for word in preferred:
         pids = get_pids_for_word(word)
         if pids:
             examples.append((word, pids[0]))
-            if len(examples) >= count: break
     if len(examples) < count:
         available = [w for w in _word_pid_map.keys() if w not in [e[0] for e in examples]]
-        if available:
-            random.shuffle(available)
-            for word in available[:count - len(examples)]:
-                pids = _word_pid_map[word]
-                examples.append((word, pids[0]))
     return examples
 # =====================================================================
@@ -224,117 +196,127 @@ class MotionGPT_VQVAE_Wrapper(torch.nn.Module):
         )
 # =====================================================================
-# Model Loading
 # =====================================================================
 def load_llm_model():
     print(f"Loading LLM from: {HF_REPO_ID}/{HF_SUBFOLDER}")
-    # Use environment token if available for private repos
-    token = os.environ.get("HF_TOKEN")
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True, token=token)
-        model = AutoModelForCausalLM.from_pretrained(
-            HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            token=token
-        )
-        if tokenizer.pad_token is None:
-            tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
-            model.resize_token_embeddings(len(tokenizer))
-        model.config.pad_token_id = tokenizer.pad_token_id
-        model.to(DEVICE)
-        model.eval()
-        print(f"LLM loaded (vocab size: {len(tokenizer)})")
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading LLM: {e}")
-        return None, None
 def load_vqvae_model():
     if not os.path.exists(VQVAE_CHECKPOINT):
-        print(f"VQ-VAE checkpoint not found at {VQVAE_CHECKPOINT}")
         return None
     print(f"Loading VQ-VAE from: {VQVAE_CHECKPOINT}")
-    try:
-        model = MotionGPT_VQVAE_Wrapper(smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **VQ_ARGS).to(DEVICE)
-        ckpt = torch.load(VQVAE_CHECKPOINT, map_location=DEVICE, weights_only=False) # Removed weights_only=False for compatibility, add back if torch version requires
-        state_dict = ckpt.get('model_state_dict', ckpt)
-        model.load_state_dict(state_dict, strict=False)
-        model.eval()
-        return model
-    except Exception as e:
-        print(f"Error loading VQVAE: {e}")
-        return None
 def load_stats():
     if not os.path.exists(STATS_PATH):
         return None, None
-    try:
-        st = torch.load(STATS_PATH, map_location='cpu')
-        mean, std = st.get('mean', 0), st.get('std', 1)
-        if torch.is_tensor(mean): mean = mean.cpu().numpy()
-        if torch.is_tensor(std): std = std.cpu().numpy()
-        return mean, std
-    except Exception as e:
-        print(f"Error loading stats: {e}")
-        return None, None
 def load_smplx_model():
     if not os.path.exists(SMPLX_MODEL_DIR):
         print(f"SMPL-X directory not found: {SMPLX_MODEL_DIR}")
         return None
     print(f"Loading SMPL-X from: {SMPLX_MODEL_DIR}")
-    try:
-        model = smplx.SMPLX(
-            model_path=SMPLX_MODEL_DIR, model_type='smplx', gender='neutral', use_pca=False,
-            create_global_orient=True, create_body_pose=True, create_betas=True,
-            create_expression=True, create_jaw_pose=True, create_left_hand_pose=True,
-            create_right_hand_pose=True, create_transl=True
-        ).to(DEVICE)
-        return model
-    except Exception as e:
-        print(f"Error loading SMPL-X: {e}")
-        return None
 def initialize_models():
     global _model_cache
-    if _model_cache["initialized"]: return
-    print("Initializing Models...")
-    # Download assets first
-    download_artifacts()
     load_word_pid_mapping()
     _model_cache["llm_model"], _model_cache["llm_tokenizer"] = load_llm_model()
-    _model_cache["vqvae_model"] = load_vqvae_model()
-    _model_cache["stats"] = load_stats()
-    _model_cache["smplx_model"] = load_smplx_model()
     _model_cache["initialized"] = True
-    print("Initialization complete.")
 def precompute_examples():
     global _example_cache
-    if not _model_cache["initialized"]: return
     examples = get_example_words_with_pids(3)
-    if not examples: return
-    print(f"Pre-computing {len(examples)} examples...")
     for word, pid in examples:
         key = f"{word}_{pid}"
         try:
             html, tokens = generate_animation_for_word(word, pid, upper_body_only=True)
             _example_cache[key] = {"html": html, "tokens": tokens, "word": word, "pid": pid}
         except Exception as e:
-            print(f"Failed pre-compute {word}: {e}")
 # =====================================================================
-# Motion Generation & Visualization Logic (Kept largely the same)
 # =====================================================================
 def generate_motion_tokens(word: str, variant: str) -> str:
     model = _model_cache["llm_model"]
     tokenizer = _model_cache["llm_tokenizer"]
-    if model is None: return "Error: LLM not loaded."
     prompt = f"Instruction: Generate motion for word '{word}' with variant '{variant}'.\nMotion: "
     inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
@@ -348,269 +330,587 @@ def generate_motion_tokens(word: str, variant: str) -> str:
             eos_token_id=tokenizer.convert_tokens_to_ids(M_END),
             early_stopping=True
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=False)
     motion_part = decoded.split("Motion: ")[-1] if "Motion: " in decoded else decoded
     return motion_part.strip()
 def parse_motion_tokens(token_str: str) -> list:
-    if isinstance(token_str, str):
-        matches = re.findall(r'<M(\d+)>', token_str)
-        if not matches: matches = re.findall(r'<motion_(\d+)>', token_str)
-        if matches: return [int(x) for x in matches]
     return []
 def decode_tokens_to_params(tokens: list) -> np.ndarray:
     vqvae_model = _model_cache["vqvae_model"]
     mean, std = _model_cache["stats"]
-    if vqvae_model is None or not tokens: return np.zeros((0, SMPL_DIM), dtype=np.float32)
     idx = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
-    with torch.no_grad():
-        quantizer = vqvae_model.vqvae.quantizer
-        if hasattr(quantizer, "codebook"):
-            codebook = quantizer.codebook.to(DEVICE)
             emb = codebook[idx]
             x_quantized = emb.permute(0, 2, 1).contiguous()
-        else:
-             # Fallback if specific quantizer logic fails
-             return np.zeros((0, SMPL_DIM), dtype=np.float32)
         x_dec = vqvae_model.vqvae.decoder(x_quantized)
         smpl_out = vqvae_model.vqvae.postprocess(x_dec)
         params_np = smpl_out.squeeze(0).cpu().numpy()
-    if mean is not None and std is not None:
         params_np = (params_np * np.array(std).reshape(1, -1)) + np.array(mean).reshape(1, -1)
     return params_np
 def params_to_vertices(params_seq: np.ndarray) -> tuple:
     smplx_model = _model_cache["smplx_model"]
-    if smplx_model is None: return None, None
     starts = np.cumsum([0] + PARAM_DIMS[:-1])
     ends = starts + np.array(PARAM_DIMS)
     T = params_seq.shape[0]
     all_verts = []
-    # Process in chunks to avoid memory issues on CPU spaces
-    batch_size = 10
     with torch.no_grad():
         for s in range(0, T, batch_size):
             batch = params_seq[s:s+batch_size]
             np_parts = {name: batch[:, st:ed].astype(np.float32) for name, st, ed in zip(PARAM_NAMES, starts, ends)}
             tensor_parts = {name: torch.from_numpy(arr).to(DEVICE) for name, arr in np_parts.items()}
-            # Simple handling for body pose/orient split
             body_t = tensor_parts['body_pose']
-            # Assumption: Model output matches SMPL-X expectations.
-            # Simplified logic for demo stability:
-            global_orient = body_t[:, :3].contiguous()
-            body_pose_only = body_t[:, 3:66].contiguous() # Trim to standard 63 if needed, or keep dynamic
-            try:
-                out = smplx_model(
-                    betas=tensor_parts['betas'], global_orient=global_orient, body_pose=body_pose_only,
-                    left_hand_pose=tensor_parts['left_hand_pose'], right_hand_pose=tensor_parts['right_hand_pose'],
-                    expression=tensor_parts['expression'], jaw_pose=tensor_parts['jaw_pose'],
-                    leye_pose=tensor_parts['eye_pose'], reye_pose=tensor_parts['eye_pose'],
-                    transl=tensor_parts['trans'], return_verts=True
-                )
-                all_verts.append(out.vertices.detach().cpu().numpy())
-            except Exception as e:
-                print(f"SMPL-X Forward pass error: {e}")
-                return None, None
-    if not all_verts: return None, None
     return np.concatenate(all_verts, axis=0), smplx_model.faces.astype(np.int32)
-def compute_upper_body_bounds(verts):
-    if verts is None: return None
     v = verts[0]
     y_min, y_max = v[:, 1].min(), v[:, 1].max()
     x_min, x_max = v[:, 0].min(), v[:, 0].max()
     z_min, z_max = v[:, 2].min(), v[:, 2].max()
     body_height = y_max - y_min
     waist_y = y_min + body_height * 0.45
-    # Add margins
     return {
-        'y_range': [waist_y, y_max + 0.1],
-        'x_range': [x_min - 0.2, x_max + 0.2],
-        'z_range': [z_min - 0.2, z_max + 0.2],
-        'center': [(x_min + x_max)/2, (waist_y + y_max)/2, (z_min + z_max)/2]
     }
 # =====================================================================
-# HTML Generation
 # =====================================================================
-def create_animation_html(verts, faces, upper_body_only=True, title=""):
-    if verts is None: return create_error_html("Model generation failed.")
-    T = verts.shape[0]
     i, j, k = faces.T.tolist()
     bounds = compute_upper_body_bounds(verts) if upper_body_only else None
-    mesh = go.Mesh3d(x=verts[0,:,0], y=verts[0,:,1], z=verts[0,:,2], i=i, j=j, k=k,
-                     color='#6FA8DC', opacity=0.8, flatshading=True)
-    frames = [go.Frame(data=[go.Mesh3d(x=verts[t,:,0], y=verts[t,:,1], z=verts[t,:,2], i=i, j=j, k=k)], name=str(t)) for t in range(T)]
-    scene_cfg = dict(aspectmode='data', xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False))
-    if bounds:
-        scene_cfg.update(dict(
-            xaxis=dict(range=bounds['x_range'], visible=False),
-            yaxis=dict(range=bounds['y_range'], visible=False),
-            zaxis=dict(range=bounds['z_range'], visible=False),
-            aspectmode='manual', aspectratio=dict(x=1, y=1, z=1),
-            camera=dict(eye=dict(x=0, y=0.5, z=2.0))
-        ))
     fig = go.Figure(data=[mesh], frames=frames)
     fig.update_layout(
-        title=title, scene=scene_cfg, height=500, margin=dict(l=0, r=0, t=30, b=0),
-        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration": 50}}])])]
     )
-    return fig.to_html(include_plotlyjs='cdn', full_html=True)
-def create_side_by_side_html(verts1, faces1, verts2, faces2, title1="", title2=""):
-    if verts1 is None or verts2 is None: return create_error_html("One or both models failed.")
     T = min(verts1.shape[0], verts2.shape[0])
     verts1, verts2 = verts1[:T], verts2[:T]
     i1, j1, k1 = faces1.T.tolist()
     i2, j2, k2 = faces2.T.tolist()
-    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scene'}, {'type': 'scene'}]], subplot_titles=[title1, title2])
-    fig.add_trace(go.Mesh3d(x=verts1[0,:,0], y=verts1[0,:,1], z=verts1[0,:,2], i=i1, j=j1, k1=k1, color='#6FA8DC'), row=1, col=1)
-    fig.add_trace(go.Mesh3d(x=verts2[0,:,0], y=verts2[0,:,1], z=verts2[0,:,2], i=i2, j=j2, k2=k2, color='#93C47D'), row=1, col=2)
     frames = []
     for t in range(T):
-        frames.append(go.Frame(data=[
-            go.Mesh3d(x=verts1[t,:,0], y=verts1[t,:,1], z=verts1[t,:,2], i=i1, j=j1, k=k1),
-            go.Mesh3d(x=verts2[t,:,0], y=verts2[t,:,1], z=verts2[t,:,2], i=i2, j=j2, k=k2)
-        ], name=str(t)))
     fig.frames = frames
-    # Generic simple camera
-    cam = dict(eye=dict(x=0, y=0, z=2.2), up=dict(x=0, y=1, z=0))
     fig.update_layout(
-        scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), camera=cam, aspectmode='data'),
-        scene2=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), camera=cam, aspectmode='data'),
-        height=500, margin=dict(l=0, r=0, t=30, b=0),
-        updatemenus=[dict(type="buttons", buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration": 50}}])])]
     )
-    return fig.to_html(include_plotlyjs='cdn', full_html=True)
-def create_iframe_html(html_content):
-    escaped = html_module.escape(html_content)
-    return f'<iframe srcdoc="{escaped}" style="width: 100%; height: 520px; border: none;"></iframe>'
-def create_error_html(msg):
-    return f'<div style="text-align:center; padding:50px;">{msg}</div>'
-def create_placeholder_html():
-    return '<div style="text-align:center; padding:50px; color:#666;">Enter a word to generate animation</div>'
 # =====================================================================
-# Main Generators
 # =====================================================================
-def generate_verts_for_word(word, pid):
-    gen_tokens = generate_motion_tokens(word, pid)
-    ids = parse_motion_tokens(gen_tokens)
-    if not ids: return None, None, gen_tokens
-    params = decode_tokens_to_params(ids)
     verts, faces = params_to_vertices(params)
-    return verts, faces, gen_tokens
-def generate_animation_for_word(word, pid, upper_body_only=True):
     verts, faces, tokens = generate_verts_for_word(word, pid)
-    html = create_animation_html(verts, faces, upper_body_only, title=pid)
-    return html, tokens
-def process_word(word):
-    if not _model_cache["initialized"]: initialize_models()
     word = word.strip().lower()
     pids = get_random_pids_for_word(word, 2)
     if not pids:
-        return create_iframe_html(create_error_html(f"Word '{word}' not found in dataset.")), ""
-    if len(pids) == 1: pids = [pids[0], pids[0]]
     try:
-        verts1, faces1, tok1 = generate_verts_for_word(word, pids[0])
-        verts2, faces2, tok2 = generate_verts_for_word(word, pids[1])
         if verts1 is None and verts2 is None:
-            return create_iframe_html(create_error_html("Motion generation failed.")), f"{tok1}\n{tok2}"
-        # If one fails, show single
-        if verts1 is None: return create_iframe_html(create_animation_html(verts2, faces2, title=pids[1])), tok2
-        if verts2 is None: return create_iframe_html(create_animation_html(verts1, faces1, title=pids[0])), tok1
-        html = create_side_by_side_html(verts1, faces1, verts2, faces2, title1=pids[0], title2=pids[1])
-        return create_iframe_html(html), f"[{pids[0]}] {tok1}\n\n[{pids[1]}] {tok2}"
     except Exception as e:
-        return create_iframe_html(create_error_html(f"Error: {str(e)}")), ""
-def get_example(word, pid):
-    if not _model_cache["initialized"]: initialize_models()
     key = f"{word}_{pid}"
     if key in _example_cache:
-        return create_iframe_html(_example_cache[key]["html"]), _example_cache[key]["tokens"]
-    # Generate on fly if cache miss
-    html, tok = generate_animation_for_word(word, pid)
-    return create_iframe_html(html), tok
 # =====================================================================
-# App Launch
 # =====================================================================
-def create_ui():
-    initialize_models()
-    precompute_examples()
-    with gr.Blocks(title="SignMotionGPT", theme=gr.themes.Default()) as demo:
         gr.Markdown("# SignMotionGPT Demo")
-        gr.Markdown("Input a word to generate sign language motion.")
         with gr.Row():
-            with gr.Column(scale=1):
-                txt_input = gr.Textbox(label="Word", placeholder="e.g. hello, help, computer")
-                btn = gr.Button("Generate", variant="primary")
-                txt_out = gr.Textbox(label="Generated Tokens", lines=5)
-                # Examples
-                if _example_cache:
-                    gr.Markdown("### Examples")
-                    for k, v in _example_cache.items():
-                        gr.Button(f"{v['word']} ({v['pid']})").click(
-                            fn=lambda w=v['word'], p=v['pid']: get_example(w, p),
-                            outputs=[gr.HTML(), txt_out] # Hack: we need to target the main output
                         )
-                        # To keep UI simple, I'll just skip complex example buttons in this condensed version
-                        # and rely on the user typing.
-            with gr.Column(scale=2):
-                html_out = gr.HTML(label="Visual", value=create_iframe_html(create_placeholder_html()))
-        # Wire up
-        btn.click(process_word, inputs=[txt_input], outputs=[html_out, txt_out])
-        txt_input.submit(process_word, inputs=[txt_input], outputs=[html_out, txt_out])
     return demo
-if __name__ == "__main__":
-    # Initialize immediately on startup to fail fast if files missing
-    try:
-        initialize_models()
-    except Exception as e:
-        print(f"Startup initialization warning: {e}")
-    demo = create_ui()
-    # In Spaces, simply use .launch() without arguments
-    demo.launch()

+"""
+SignMotionGPT - HuggingFace Spaces Demo
+Text-to-Sign Language Motion Generation
+"""
 import os
 import sys
 import re
 import json
 import random
 import warnings
 import html as html_module
 import torch
 import numpy as np
 warnings.filterwarnings("ignore")
 # =====================================================================
+# Configuration for HuggingFace Spaces
 # =====================================================================
+WORK_DIR = os.getcwd()
 DATA_DIR = os.path.join(WORK_DIR, "data")
 os.makedirs(DATA_DIR, exist_ok=True)
 STATS_PATH = os.path.join(DATA_DIR, "vqvae_stats.pt")
 SMPLX_MODEL_DIR = os.path.join(DATA_DIR, "smplx_models")
+# HuggingFace model config
+HF_REPO_ID = os.environ.get("HF_REPO_ID", "rdz-falcon/SignMotionGPTfit-archive")
+HF_SUBFOLDER = os.environ.get("HF_SUBFOLDER", "stage2_v2/epoch-030")
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Generation parameters
 M_START = "<M_START>"
 M_END = "<M_END>"
 PAD_TOKEN = "<PAD>"
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
+# VQ-VAE parameters
 SMPL_DIM = 182
 CODEBOOK_SIZE = 512
 CODE_DIM = 512
                "trans", "expression", "jaw_pose", "eye_pose"]
 # =====================================================================
+# Install/Import Dependencies
 # =====================================================================
+try:
+    import gradio as gr
+except ImportError:
+    os.system("pip install -q gradio>=4.0.0")
+    import gradio as gr
+try:
+    import plotly.graph_objects as go
+    from plotly.subplots import make_subplots
+except ImportError:
+    os.system("pip install -q plotly>=5.18.0")
+    import plotly.graph_objects as go
+    from plotly.subplots import make_subplots
+try:
+    import smplx
+except ImportError:
+    os.system("pip install -q smplx==0.1.28")
+    import smplx
+from transformers import AutoModelForCausalLM, AutoTokenizer
 # =====================================================================
 # Import VQ-VAE architecture
 # =====================================================================
+# Add parent directory to path for mGPT imports
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+if parent_dir not in sys.path:
+    sys.path.insert(0, parent_dir)
+if current_dir not in sys.path:
+    sys.path.insert(0, current_dir)
 try:
     from mGPT.archs.mgpt_vq import VQVae
 except ImportError as e:
+    print(f"Warning: Could not import VQVae: {e}")
     VQVae = None
 # =====================================================================
     "initialized": False
 }
+_word_pid_map = {}  # word -> list of valid PIDs
+_example_cache = {}  # Pre-computed example animations
 # =====================================================================
+# Dataset Loading - Word to PID mapping
 # =====================================================================
 def load_word_pid_mapping():
+    """Load the dataset and build word -> PIDs mapping."""
     global _word_pid_map
     if not os.path.exists(DATASET_PATH):
         print(f"Dataset not found: {DATASET_PATH}")
         return
                     _word_pid_map[word] = set()
                 _word_pid_map[word].add(pid)
+        # Convert sets to sorted lists
         for word in _word_pid_map:
             _word_pid_map[word] = sorted(list(_word_pid_map[word]))
         print(f"Loaded {len(_word_pid_map)} unique words from dataset")
     except Exception as e:
         print(f"Error loading dataset: {e}")
 def get_pids_for_word(word: str) -> list:
+    """Get valid PIDs for a word from the dataset."""
+    word = word.lower().strip()
+    return _word_pid_map.get(word, [])
 def get_random_pids_for_word(word: str, count: int = 2) -> list:
+    """Get random PIDs for a word. Returns up to 'count' PIDs."""
     pids = get_pids_for_word(word)
+    if not pids:
+        return []
+    if len(pids) <= count:
+        return pids
     return random.sample(pids, count)
 def get_example_words_with_pids(count: int = 3) -> list:
+    """Get example words with valid PIDs from dataset."""
     examples = []
     preferred = ['push', 'passport', 'library', 'send', 'college', 'help', 'thank', 'hello']
     for word in preferred:
         pids = get_pids_for_word(word)
         if pids:
             examples.append((word, pids[0]))
+            if len(examples) >= count:
+                break
     if len(examples) < count:
         available = [w for w in _word_pid_map.keys() if w not in [e[0] for e in examples]]
+        random.shuffle(available)
+        for word in available[:count - len(examples)]:
+            pids = _word_pid_map[word]
+            examples.append((word, pids[0]))
     return examples
 # =====================================================================
         )
 # =====================================================================
+# Model Loading Functions
 # =====================================================================
 def load_llm_model():
     print(f"Loading LLM from: {HF_REPO_ID}/{HF_SUBFOLDER}")
+    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+    tokenizer = AutoTokenizer.from_pretrained(
+        HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True, token=token
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        HF_REPO_ID, subfolder=HF_SUBFOLDER, trust_remote_code=True, token=token,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+    )
+    if tokenizer.pad_token is None:
+        tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
+        model.resize_token_embeddings(len(tokenizer))
+    model.config.pad_token_id = tokenizer.pad_token_id
+    model.to(DEVICE)
+    model.eval()
+    print(f"LLM loaded (vocab size: {len(tokenizer)})")
+    return model, tokenizer
 def load_vqvae_model():
     if not os.path.exists(VQVAE_CHECKPOINT):
+        print(f"VQ-VAE checkpoint not found: {VQVAE_CHECKPOINT}")
         return None
     print(f"Loading VQ-VAE from: {VQVAE_CHECKPOINT}")
+    model = MotionGPT_VQVAE_Wrapper(smpl_dim=SMPL_DIM, codebook_size=CODEBOOK_SIZE, code_dim=CODE_DIM, **VQ_ARGS).to(DEVICE)
+    ckpt = torch.load(VQVAE_CHECKPOINT, map_location=DEVICE, weights_only=False)
+    state_dict = ckpt.get('model_state_dict', ckpt)
+    model.load_state_dict(state_dict, strict=False)
+    model.eval()
+    print(f"VQ-VAE loaded")
+    return model
 def load_stats():
     if not os.path.exists(STATS_PATH):
         return None, None
+    st = torch.load(STATS_PATH, map_location='cpu', weights_only=False)
+    mean, std = st.get('mean', 0), st.get('std', 1)
+    if torch.is_tensor(mean): mean = mean.cpu().numpy()
+    if torch.is_tensor(std): std = std.cpu().numpy()
+    return mean, std
 def load_smplx_model():
     if not os.path.exists(SMPLX_MODEL_DIR):
         print(f"SMPL-X directory not found: {SMPLX_MODEL_DIR}")
         return None
     print(f"Loading SMPL-X from: {SMPLX_MODEL_DIR}")
+    model = smplx.SMPLX(
+        model_path=SMPLX_MODEL_DIR, model_type='smplx', gender='neutral', use_pca=False,
+        create_global_orient=True, create_body_pose=True, create_betas=True,
+        create_expression=True, create_jaw_pose=True, create_left_hand_pose=True,
+        create_right_hand_pose=True, create_transl=True
+    ).to(DEVICE)
+    print(f"SMPL-X loaded")
+    return model
 def initialize_models():
     global _model_cache
+    if _model_cache["initialized"]:
+        return
+    print("\n" + "="*60)
+    print("  Initializing SignMotionGPT Models")
+    print("="*60)
+    # Load word-PID mapping from dataset
     load_word_pid_mapping()
     _model_cache["llm_model"], _model_cache["llm_tokenizer"] = load_llm_model()
+    try:
+        _model_cache["vqvae_model"] = load_vqvae_model()
+        _model_cache["stats"] = load_stats()
+        _model_cache["smplx_model"] = load_smplx_model()
+    except Exception as e:
+        print(f"Could not load visualization models: {e}")
     _model_cache["initialized"] = True
+    print("All models initialized")
+    print("="*60)
 def precompute_examples():
+    """Pre-compute animations for example words at startup."""
     global _example_cache
+    if not _model_cache["initialized"]:
+        return
     examples = get_example_words_with_pids(3)
+    print(f"\nPre-computing {len(examples)} example animations...")
     for word, pid in examples:
         key = f"{word}_{pid}"
+        print(f"  Computing: {word} ({pid})...")
         try:
             html, tokens = generate_animation_for_word(word, pid, upper_body_only=True)
             _example_cache[key] = {"html": html, "tokens": tokens, "word": word, "pid": pid}
+            print(f"    Done: {word}")
         except Exception as e:
+            print(f"    Failed: {word} - {e}")
+            _example_cache[key] = {"html": create_error_html(), "tokens": "", "word": word, "pid": pid}
+    print("Example pre-computation complete\n")
 # =====================================================================
+# Motion Generation Functions
 # =====================================================================
 def generate_motion_tokens(word: str, variant: str) -> str:
     model = _model_cache["llm_model"]
     tokenizer = _model_cache["llm_tokenizer"]
+    if model is None or tokenizer is None:
+        raise RuntimeError("LLM model not loaded")
     prompt = f"Instruction: Generate motion for word '{word}' with variant '{variant}'.\nMotion: "
     inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
             eos_token_id=tokenizer.convert_tokens_to_ids(M_END),
             early_stopping=True
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=False)
     motion_part = decoded.split("Motion: ")[-1] if "Motion: " in decoded else decoded
     return motion_part.strip()
 def parse_motion_tokens(token_str: str) -> list:
+    if isinstance(token_str, (list, tuple, np.ndarray)):
+        return [int(x) for x in token_str]
+    if not isinstance(token_str, str):
+        return []
+    matches = re.findall(r'<M(\d+)>', token_str)
+    if matches:
+        return [int(x) for x in matches]
+    matches = re.findall(r'<motion_(\d+)>', token_str)
+    if matches:
+        return [int(x) for x in matches]
     return []
 def decode_tokens_to_params(tokens: list) -> np.ndarray:
     vqvae_model = _model_cache["vqvae_model"]
     mean, std = _model_cache["stats"]
+    if vqvae_model is None or not tokens:
+        return np.zeros((0, SMPL_DIM), dtype=np.float32)
     idx = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
+    T_q = idx.shape[1]
+    quantizer = vqvae_model.vqvae.quantizer
+    if hasattr(quantizer, "codebook"):
+        codebook = quantizer.codebook.to(DEVICE)
+        code_dim = codebook.shape[1]
+    else:
+        code_dim = CODE_DIM
+    x_quantized = None
+    if hasattr(quantizer, "dequantize"):
+        try:
+            with torch.no_grad():
+                dq = quantizer.dequantize(idx)
+            if dq is not None:
+                dq = dq.contiguous()
+                if dq.ndim == 3 and dq.shape[1] == code_dim:
+                    x_quantized = dq
+                elif dq.ndim == 3 and dq.shape[1] == T_q:
+                    x_quantized = dq.permute(0, 2, 1).contiguous()
+        except Exception:
+            pass
+    if x_quantized is None:
+        if not hasattr(quantizer, "codebook"):
+            return np.zeros((0, SMPL_DIM), dtype=np.float32)
+        with torch.no_grad():
             emb = codebook[idx]
             x_quantized = emb.permute(0, 2, 1).contiguous()
+    with torch.no_grad():
         x_dec = vqvae_model.vqvae.decoder(x_quantized)
         smpl_out = vqvae_model.vqvae.postprocess(x_dec)
         params_np = smpl_out.squeeze(0).cpu().numpy()
+    if (mean is not None) and (std is not None):
         params_np = (params_np * np.array(std).reshape(1, -1)) + np.array(mean).reshape(1, -1)
     return params_np
 def params_to_vertices(params_seq: np.ndarray) -> tuple:
     smplx_model = _model_cache["smplx_model"]
+    if smplx_model is None or params_seq.shape[0] == 0:
+        return None, None
     starts = np.cumsum([0] + PARAM_DIMS[:-1])
     ends = starts + np.array(PARAM_DIMS)
     T = params_seq.shape[0]
     all_verts = []
+    batch_size = 32
+    num_body_joints = getattr(smplx_model, "NUM_BODY_JOINTS", 21)
     with torch.no_grad():
         for s in range(0, T, batch_size):
             batch = params_seq[s:s+batch_size]
+            B = batch.shape[0]
             np_parts = {name: batch[:, st:ed].astype(np.float32) for name, st, ed in zip(PARAM_NAMES, starts, ends)}
             tensor_parts = {name: torch.from_numpy(arr).to(DEVICE) for name, arr in np_parts.items()}
             body_t = tensor_parts['body_pose']
+            L_body = body_t.shape[1]
+            expected_no_go = num_body_joints * 3
+            expected_with_go = (num_body_joints + 1) * 3
+            if L_body == expected_with_go:
+                global_orient = body_t[:, :3].contiguous()
+                body_pose_only = body_t[:, 3:].contiguous()
+            elif L_body == expected_no_go:
+                global_orient = torch.zeros((B, 3), dtype=torch.float32, device=DEVICE)
+                body_pose_only = body_t
+            else:
+                if L_body > expected_no_go:
+                    global_orient = body_t[:, :3].contiguous()
+                    body_pose_only = body_t[:, 3:].contiguous()
+                else:
+                    body_pose_only = torch.nn.functional.pad(body_t, (0, max(0, expected_no_go - L_body)))
+                    global_orient = torch.zeros((B, 3), dtype=torch.float32, device=DEVICE)
+            out = smplx_model(
+                betas=tensor_parts['betas'], global_orient=global_orient, body_pose=body_pose_only,
+                left_hand_pose=tensor_parts['left_hand_pose'], right_hand_pose=tensor_parts['right_hand_pose'],
+                expression=tensor_parts['expression'], jaw_pose=tensor_parts['jaw_pose'],
+                leye_pose=tensor_parts['eye_pose'], reye_pose=tensor_parts['eye_pose'],
+                transl=tensor_parts['trans'], return_verts=True
+            )
+            all_verts.append(out.vertices.detach().cpu().numpy())
     return np.concatenate(all_verts, axis=0), smplx_model.faces.astype(np.int32)
+def compute_upper_body_bounds(verts: np.ndarray) -> dict:
+    """Compute bounds for upper body view. SMPL-X: Y is up, Z is forward."""
+    if verts is None or verts.shape[0] == 0:
+        return None
     v = verts[0]
     y_min, y_max = v[:, 1].min(), v[:, 1].max()
     x_min, x_max = v[:, 0].min(), v[:, 0].max()
     z_min, z_max = v[:, 2].min(), v[:, 2].max()
     body_height = y_max - y_min
     waist_y = y_min + body_height * 0.45
+    upper_center_y = (waist_y + y_max) / 2
+    x_padding = (x_max - x_min) * 0.15
+    z_padding = (z_max - z_min) * 0.15
     return {
+        'waist_y': waist_y,
+        'upper_center_y': upper_center_y,
+        'y_range': [waist_y - body_height * 0.05, y_max + body_height * 0.05],
+        'x_range': [x_min - x_padding, x_max + x_padding],
+        'z_range': [z_min - z_padding, z_max + z_padding],
+        'center': [(x_min + x_max) / 2, upper_center_y, (z_min + z_max) / 2]
     }
 # =====================================================================
+# Visualization Functions
 # =====================================================================
+def create_animation_html(verts: np.ndarray, faces: np.ndarray, fps: int = 20,
+                          upper_body_only: bool = True, title: str = "") -> str:
+    """Create Plotly animation HTML."""
+    if verts is None or faces is None or verts.shape[0] == 0:
+        return create_placeholder_html()
+    T, V, _ = verts.shape
     i, j, k = faces.T.tolist()
+    frame_duration = 1000 // fps
     bounds = compute_upper_body_bounds(verts) if upper_body_only else None
+    mesh = go.Mesh3d(
+        x=verts[0, :, 0], y=verts[0, :, 1], z=verts[0, :, 2],
+        i=i, j=j, k=k, flatshading=True, opacity=0.6,
+        color='#6FA8DC',
+        lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2)
+    )
+    frames = [
+        go.Frame(
+            data=[go.Mesh3d(
+                x=verts[t, :, 0], y=verts[t, :, 1], z=verts[t, :, 2],
+                i=i, j=j, k=k, flatshading=True, opacity=0.6,
+                color='#6FA8DC',
+                lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2)
+            )],
+            name=str(t)
+        )
+        for t in range(T)
+    ]
     fig = go.Figure(data=[mesh], frames=frames)
+    sliders = [dict(
+        active=0, yanchor="top", xanchor="left",
+        currentvalue=dict(font=dict(size=12), prefix="Frame: ", visible=True, xanchor="right"),
+        pad=dict(b=5, t=30), len=0.75, x=0.2, y=0.02,
+        steps=[
+            dict(args=[[str(t)], dict(frame=dict(duration=frame_duration, redraw=True), mode="immediate", transition=dict(duration=0))],
+                 label=str(t) if t % 10 == 0 else "", method="animate")
+            for t in range(T)
+        ]
+    )]
+    if bounds and upper_body_only:
+        scene_config = dict(
+            aspectmode='manual', aspectratio=dict(x=1, y=1.2, z=1),
+            xaxis=dict(visible=False, showbackground=False, range=bounds['x_range']),
+            yaxis=dict(visible=False, showbackground=False, range=bounds['y_range']),
+            zaxis=dict(visible=False, showbackground=False, range=bounds['z_range']),
+            camera=dict(
+                eye=dict(x=0, y=bounds['center'][1] * 0.1, z=2.5),
+                center=dict(x=0, y=bounds['center'][1], z=0),
+                up=dict(x=0, y=1, z=0)
+            ),
+            bgcolor='rgba(250,250,250,1)'
+        )
+    else:
+        scene_config = dict(
+            aspectmode='data',
+            xaxis=dict(visible=False, showbackground=False),
+            yaxis=dict(visible=False, showbackground=False),
+            zaxis=dict(visible=False, showbackground=False),
+            camera=dict(eye=dict(x=0, y=0, z=2.5), up=dict(x=0, y=1, z=0)),
+            bgcolor='rgba(250,250,250,1)'
+        )
+    annotations = []
+    if title:
+        annotations.append(dict(
+            text=f"<b>{title}</b>",
+            x=0.5, y=1.0, xref="paper", yref="paper",
+            showarrow=False, font=dict(size=14),
+            xanchor="center", yanchor="bottom"
+        ))
     fig.update_layout(
+        scene=scene_config,
+        annotations=annotations,
+        updatemenus=[dict(
+            type="buttons", showactive=True,
+            x=0.02, y=0.02, xanchor="left", yanchor="bottom",
+            pad=dict(t=0, r=10), direction="right",
+            buttons=[
+                dict(label="Play", method="animate",
+                     args=[None, {"frame": {"duration": frame_duration, "redraw": True}, "fromcurrent": True, "transition": {"duration": 0}}]),
+                dict(label="Pause", method="animate",
+                     args=[[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate"}]),
+                dict(label="Reset", method="animate",
+                     args=[["0"], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"}])
+            ]
+        )],
+        sliders=sliders,
+        height=500,
+        margin=dict(l=0, r=0, t=30 if title else 10, b=60),
+        paper_bgcolor='rgba(250,250,250,1)',
+        plot_bgcolor='rgba(250,250,250,1)'
+    )
+    return fig.to_html(
+        include_plotlyjs='cdn', full_html=True,
+        config={'displayModeBar': True, 'displaylogo': False, 'scrollZoom': True,
+                'modeBarButtonsToRemove': ['lasso2d', 'select2d', 'toImage']}
     )
+def create_side_by_side_html(verts1, faces1, verts2, faces2, title1="", title2="", fps=20) -> str:
+    """Create side-by-side animation HTML for two avatars."""
+    if verts1 is None or verts2 is None:
+        return create_placeholder_html()
     T = min(verts1.shape[0], verts2.shape[0])
     verts1, verts2 = verts1[:T], verts2[:T]
     i1, j1, k1 = faces1.T.tolist()
     i2, j2, k2 = faces2.T.tolist()
+    frame_duration = 1000 // fps
+    bounds1 = compute_upper_body_bounds(verts1)
+    bounds2 = compute_upper_body_bounds(verts2)
+    fig = make_subplots(
+        rows=1, cols=2,
+        specs=[[{'type': 'scene'}, {'type': 'scene'}]],
+        horizontal_spacing=0.02,
+        subplot_titles=[title1, title2]
+    )
+    mesh1 = go.Mesh3d(
+        x=verts1[0, :, 0], y=verts1[0, :, 1], z=verts1[0, :, 2],
+        i=i1, j=j1, k=k1, flatshading=True, opacity=0.6, color='#6FA8DC',
+        lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2), scene='scene'
+    )
+    mesh2 = go.Mesh3d(
+        x=verts2[0, :, 0], y=verts2[0, :, 1], z=verts2[0, :, 2],
+        i=i2, j=j2, k=k2, flatshading=True, opacity=0.6, color='#93C47D',
+        lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2), scene='scene2'
+    )
+    fig.add_trace(mesh1, row=1, col=1)
+    fig.add_trace(mesh2, row=1, col=2)
     frames = []
     for t in range(T):
+        frames.append(go.Frame(
+            name=str(t),
+            data=[
+                go.Mesh3d(x=verts1[t, :, 0], y=verts1[t, :, 1], z=verts1[t, :, 2],
+                         i=i1, j=j1, k=k1, flatshading=True, opacity=0.6, color='#6FA8DC',
+                         lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2), scene='scene'),
+                go.Mesh3d(x=verts2[t, :, 0], y=verts2[t, :, 1], z=verts2[t, :, 2],
+                         i=i2, j=j2, k=k2, flatshading=True, opacity=0.6, color='#93C47D',
+                         lighting=dict(ambient=0.6, diffuse=0.7, specular=0.2), scene='scene2')
+            ]
+        ))
     fig.frames = frames
+    sliders = [dict(
+        active=0, yanchor="top", xanchor="left",
+        currentvalue=dict(font=dict(size=12), prefix="Frame: ", visible=True, xanchor="right"),
+        pad=dict(b=5, t=30), len=0.75, x=0.15, y=0.02,
+        steps=[
+            dict(args=[[str(t)], dict(frame=dict(duration=frame_duration, redraw=True), mode="immediate", transition=dict(duration=0))],
+                 label=str(t) if t % 10 == 0 else "", method="animate")
+            for t in range(T)
+        ]
+    )]
+    def make_scene_config(bounds):
+        if bounds:
+            return dict(
+                aspectmode='manual', aspectratio=dict(x=1, y=1.2, z=1),
+                xaxis=dict(visible=False, showbackground=False, range=bounds['x_range']),
+                yaxis=dict(visible=False, showbackground=False, range=bounds['y_range']),
+                zaxis=dict(visible=False, showbackground=False, range=bounds['z_range']),
+                camera=dict(eye=dict(x=0, y=bounds['center'][1]*0.1, z=2.5),
+                           center=dict(x=0, y=bounds['center'][1], z=0), up=dict(x=0, y=1, z=0)),
+                bgcolor='rgba(250,250,250,1)'
+            )
+        return dict(aspectmode='data', xaxis=dict(visible=False), yaxis=dict(visible=False),
+                   zaxis=dict(visible=False), camera=dict(eye=dict(x=0, y=0, z=2.5), up=dict(x=0, y=1, z=0)),
+                   bgcolor='rgba(250,250,250,1)')
     fig.update_layout(
+        scene=make_scene_config(bounds1),
+        scene2=make_scene_config(bounds2),
+        updatemenus=[dict(
+            type="buttons", showactive=True,
+            x=0.02, y=0.02, xanchor="left", yanchor="bottom",
+            pad=dict(t=0, r=10), direction="right",
+            buttons=[
+                dict(label="Play", method="animate",
+                     args=[None, {"frame": {"duration": frame_duration, "redraw": True}, "fromcurrent": True, "transition": {"duration": 0}}]),
+                dict(label="Pause", method="animate",
+                     args=[[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate"}]),
+                dict(label="Reset", method="animate",
+                     args=[["0"], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"}])
+            ]
+        )],
+        sliders=sliders,
+        height=500,
+        margin=dict(l=0, r=0, t=40, b=60),
+        paper_bgcolor='rgba(250,250,250,1)'
+    )
+    return fig.to_html(
+        include_plotlyjs='cdn', full_html=True,
+        config={'displayModeBar': True, 'displaylogo': False, 'scrollZoom': True,
+                'modeBarButtonsToRemove': ['lasso2d', 'select2d', 'toImage']}
     )
+def create_placeholder_html() -> str:
+    return """
+    <div style="display: flex; justify-content: center; align-items: center;
+                height: 500px; background: #fafafa; border-radius: 4px; border: 1px solid #e0e0e0;">
+        <p style="font-size: 14px; color: #888;">Enter a word to generate motion</p>
+    </div>
+    """
+def create_error_html(msg: str = "Error generating animation") -> str:
+    return f"""
+    <div style="display: flex; justify-content: center; align-items: center;
+                height: 500px; background: #fafafa; border-radius: 4px; border: 1px solid #e0e0e0;">
+        <p style="font-size: 14px; color: #c00;">{msg}</p>
+    </div>
+    """
+def create_iframe_html(html_content: str, height: int = 530) -> str:
+    escaped_html = html_module.escape(html_content)
+    return f'''
+    <div style="width: 100%; height: {height}px; border: 1px solid #ddd; border-radius: 4px; overflow: hidden; background: #fafafa;">
+        <iframe srcdoc="{escaped_html}" style="width: 100%; height: 100%; border: none;" sandbox="allow-scripts allow-same-origin"></iframe>
+    </div>
+    '''
 # =====================================================================
+# Main Processing Functions
 # =====================================================================
+def generate_verts_for_word(word: str, pid: str) -> tuple:
+    """Generate vertices and faces for a word-PID pair."""
+    generated_tokens = generate_motion_tokens(word, pid)
+    token_ids = parse_motion_tokens(generated_tokens)
+    if not token_ids:
+        return None, None, generated_tokens
+    if _model_cache["vqvae_model"] is None or _model_cache["smplx_model"] is None:
+        return None, None, generated_tokens
+    params = decode_tokens_to_params(token_ids)
+    if params.shape[0] == 0:
+        return None, None, generated_tokens
     verts, faces = params_to_vertices(params)
+    return verts, faces, generated_tokens
+def generate_animation_for_word(word: str, pid: str, upper_body_only: bool = True) -> tuple:
+    """Generate animation HTML and tokens for a word. Returns (html, tokens)."""
     verts, faces, tokens = generate_verts_for_word(word, pid)
+    if verts is None:
+        return create_placeholder_html(), tokens
+    animation_html = create_animation_html(verts, faces, upper_body_only=upper_body_only, title=f"{pid}")
+    return animation_html, tokens
+def process_word(word: str):
+    """Main processing: generate side-by-side comparison for two random PIDs."""
+    if not word or not word.strip():
+        return create_iframe_html(create_placeholder_html()), ""
     word = word.strip().lower()
     pids = get_random_pids_for_word(word, 2)
     if not pids:
+        return create_iframe_html(create_error_html(f"Word '{word}' not found in dataset")), ""
+    if len(pids) == 1:
+        pids = [pids[0], pids[0]]
     try:
+        verts1, faces1, tokens1 = generate_verts_for_word(word, pids[0])
+        verts2, faces2, tokens2 = generate_verts_for_word(word, pids[1])
         if verts1 is None and verts2 is None:
+            return create_iframe_html(create_error_html("Failed to generate motion")), tokens1 or tokens2
+        if verts1 is None:
+            html = create_animation_html(verts2, faces2, upper_body_only=True, title=f"{pids[1]}")
+            return create_iframe_html(html), tokens2
+        if verts2 is None:
+            html = create_animation_html(verts1, faces1, upper_body_only=True, title=f"{pids[0]}")
+            return create_iframe_html(html), tokens1
+        html = create_side_by_side_html(verts1, faces1, verts2, faces2,
+                                         title1=f"{pids[0]}", title2=f"{pids[1]}")
+        combined_tokens = f"[{pids[0]}] {tokens1}\n\n[{pids[1]}] {tokens2}"
+        return create_iframe_html(html), combined_tokens
     except Exception as e:
+        return create_iframe_html(create_error_html(f"Error: {str(e)[:100]}")), ""
+def get_example_animation(word: str, pid: str):
+    """Get pre-computed example animation."""
     key = f"{word}_{pid}"
     if key in _example_cache:
+        cached = _example_cache[key]
+        return create_iframe_html(cached["html"]), cached["tokens"]
+    html, tokens = generate_animation_for_word(word, pid, upper_body_only=True)
+    return create_iframe_html(html), tokens
 # =====================================================================
+# Gradio Interface
 # =====================================================================
+def create_gradio_interface():
+    default_html = create_iframe_html(create_placeholder_html())
+    custom_css = """
+    .gradio-container { max-width: 1400px !important; }
+    .example-row { margin-top: 15px; padding: 12px; background: #f8f9fa; border-radius: 6px; }
+    """
+    example_list = list(_example_cache.values()) if _example_cache else []
+    with gr.Blocks(title="SignMotionGPT", css=custom_css, theme=gr.themes.Default()) as demo:
         gr.Markdown("# SignMotionGPT Demo")
+        gr.Markdown("Text-to-Sign Language Motion Generation with Variant Comparison")
         with gr.Row():
+            with gr.Column(scale=1, min_width=280):
+                gr.Markdown("### Input")
+                word_input = gr.Textbox(
+                    label="Word",
+                    placeholder="Enter a word from the dataset...",
+                    lines=1, max_lines=1
+                )
+                generate_btn = gr.Button("Generate Motion", variant="primary", size="lg")
+                gr.Markdown("---")
+                gr.Markdown("### Generated Tokens")
+                tokens_output = gr.Textbox(
+                    label="Motion Tokens (both variants)",
+                    lines=8,
+                    interactive=False,
+                    show_copy_button=True
+                )
+                if _word_pid_map:
+                    sample_words = list(_word_pid_map.keys())[:10]
+                    gr.Markdown(f"**Available words:** {', '.join(sample_words)}, ...")
+            with gr.Column(scale=2, min_width=700):
+                gr.Markdown("### Motion Comparison (Two Signer Variants)")
+                animation_output = gr.HTML(value=default_html, elem_id="animation-container")
+        if example_list:
+            gr.Markdown("---")
+            gr.Markdown("### Pre-computed Examples")
+            for item in example_list:
+                word, pid = item['word'], item['pid']
+                with gr.Row(elem_classes="example-row"):
+                    with gr.Column(scale=1, min_width=120):
+                        gr.Markdown(f"**{word.capitalize()}**")
+                        gr.Markdown(f"Variant: {pid}")
+                        example_btn = gr.Button(f"Load", size="sm")
+                    with gr.Column(scale=3, min_width=500):
+                        example_html = gr.HTML(
+                            value=create_iframe_html(create_placeholder_html(), height=450),
+                            elem_id=f"example-{word}"
                         )
+                    example_btn.click(
+                        fn=lambda w=word, p=pid: get_example_animation(w, p),
+                        inputs=[],
+                        outputs=[example_html, tokens_output]
+                    )
+        gr.Markdown("---")
+        gr.Markdown("*SignMotionGPT: LLM-based sign language motion generation*")
+        generate_btn.click(
+            fn=process_word,
+            inputs=[word_input],
+            outputs=[animation_output, tokens_output]
+        )
+        word_input.submit(
+            fn=process_word,
+            inputs=[word_input],
+            outputs=[animation_output, tokens_output]
+        )
     return demo
+# =====================================================================
+# Main Entry Point for HuggingFace Spaces
+# =====================================================================
+print("\n" + "="*60)
+print("  SignMotionGPT - HuggingFace Spaces")
+print("="*60)
+print(f"Device: {DEVICE}")
+print(f"Model: {HF_REPO_ID}/{HF_SUBFOLDER}")
+print(f"Data Directory: {DATA_DIR}")
+print(f"Dataset: {DATASET_PATH}")
+print("="*60 + "\n")
+# Initialize models at startup
+initialize_models()
+# Pre-compute example animations
+precompute_examples()
+# Create and launch interface
+demo = create_gradio_interface()
+if __name__ == "__main__":
+    demo.launch()