Spaces:

rdz-falcon
/

SignMotionGPT

Running

App Files Files Community

rdz-falcon commited on Dec 7, 2025

Commit

2d54a11

verified ·

1 Parent(s): c586336

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -122

app.py CHANGED Viewed

@@ -6,10 +6,10 @@ import warnings
 import re
 import json
 import random
-import base64
 from pathlib import Path
-# Add root to path to allow imports from project root
 current_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(current_dir)
 sys.path.append(current_dir)
@@ -18,12 +18,18 @@ sys.path.append(parent_dir)
 # Import project modules
 try:
     from visualize import visualize
 except Exception as e:
     print(f"Error importing project modules: {e}")
-    # Fallback/Dummy visualize for testing if module is missing
-    def visualize(**kwargs):
-        print("Visualizer called (Dummy)")
-        return None
 # Constants
 HF_REPO_ID = "rdz-falcon/SignMotionGPTfit-archive"
@@ -31,7 +37,7 @@ EPOCH_SUBFOLDER = "stage2_v2/epoch-030"
 CODEBOOK_SIZE = 512
 DATASET_PATH = os.environ.get("DATASET_PATH", "enriched_dataset.json")
-# Inference Params
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
@@ -41,18 +47,24 @@ M_END = "<M_END>"
 # Global model cache
 MODEL = None
 TOKENIZER = None
 M_START_ID = None
 M_END_ID = None
 VARIANT_MAP = {}
 def load_variant_map():
     global VARIANT_MAP
     candidates = [
         DATASET_PATH,
         os.path.join(os.path.dirname(__file__), DATASET_PATH),
-        "data/motion_llm_dataset.json",
         "motion_llm_dataset.json"
     ]
     found_path = None
     for p in candidates:
         if os.path.exists(p):
@@ -64,63 +76,136 @@ def load_variant_map():
         try:
             with open(found_path, 'r', encoding='utf-8') as f:
                 data = json.load(f)
             mapping = {}
             for entry in data:
                 word = entry.get("word") or entry.get("text_query")
                 if not word: continue
                 word = word.lower().strip()
                 pid = entry.get("participant_id")
                 if word and pid:
-                    mapping.setdefault(word, []).append(str(pid))
             VARIANT_MAP = mapping
-            print(f"Loaded variants for {len(VARIANT_MAP)} words.")
         except Exception as e:
             print(f"Error loading dataset: {e}")
     else:
-        print("⚠️ Dataset not found. Variants will default to 'unknown'.")
-        # Fallbacks
-        VARIANT_MAP["push"] = ["P40", "P123"]
-        VARIANT_MAP["send"] = ["P40"]
 def init_model():
     global MODEL, TOKENIZER, M_START_ID, M_END_ID
     if MODEL is not None:
         return
     load_variant_map()
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    token = os.environ.get("HF_TOKEN")
-    print(f"Loading model from HF: {HF_REPO_ID}/{EPOCH_SUBFOLDER}")
-    TOKENIZER = AutoTokenizer.from_pretrained(HF_REPO_ID, subfolder=EPOCH_SUBFOLDER, token=token, trust_remote_code=True)
-    MODEL = AutoModelForCausalLM.from_pretrained(HF_REPO_ID, subfolder=EPOCH_SUBFOLDER, token=token, trust_remote_code=True)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     MODEL.to(device)
     MODEL.eval()
-    # Add special tokens if missing
-    if M_START not in TOKENIZER.get_vocab():
-        TOKENIZER.add_special_tokens({"additional_special_tokens": [M_START, M_END]})
-        MODEL.resize_token_embeddings(len(TOKENIZER))
     M_START_ID = TOKENIZER.convert_tokens_to_ids(M_START)
     M_END_ID = TOKENIZER.convert_tokens_to_ids(M_END)
-    if "<motion_0>" not in TOKENIZER.get_vocab():
-        print("Adding motion tokens...")
         motion_tokens = [f"<motion_{i}>" for i in range(CODEBOOK_SIZE)]
-        TOKENIZER.add_tokens(motion_tokens, special_tokens=True)
-        MODEL.resize_token_embeddings(len(TOKENIZER))
 def generate_motion_simple(model, tokenizer, prompt_text, device):
     word_lower = prompt_text.lower().strip()
-    variants = VARIANT_MAP.get(word_lower, ["unknown"])
-    pid = random.choice(variants)
     prompt = f"Instruction: Generate motion for word '{prompt_text}' with variant '{pid}'.\nMotion: "
-    print(f"Input Prompt: {prompt}")
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
@@ -133,51 +218,94 @@ def generate_motion_simple(model, tokenizer, prompt_text, device):
             top_k=INFERENCE_TOP_K,
             repetition_penalty=INFERENCE_REPETITION_PENALTY,
             pad_token_id=tokenizer.pad_token_id,
-            eos_token_id=M_END_ID,
             early_stopping=True
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=False)
-    return decoded.split("Motion: ")[-1].strip() if "Motion: " in decoded else decoded.strip()
 def generate_motion_app(text_prompt):
-    # Returns: (iframe_html, file_path, status_text)
     if not text_prompt:
-        return None, None, "Please enter a prompt."
     if MODEL is None:
         try:
             init_model()
         except Exception as e:
-            return None, None, f"Model Init Error: {e}"
     print(f"Generating for: {text_prompt}")
     try:
-        # 1. Generate Tokens
-        generated_sequence = generate_motion_simple(MODEL, TOKENIZER, text_prompt, MODEL.device)
-        # Clean tokens
         m_tokens = re.findall(r'<M(\d+)>', generated_sequence)
         if not m_tokens:
             m_tokens = re.findall(r'<motion_(\d+)>', generated_sequence)
-        tokens_for_vis = " ".join(m_tokens) if m_tokens else generated_sequence
-        # 2. Visualization Paths
         data_dir = os.environ.get("DATA_DIR", "data")
         vqvae_ckpt = os.path.join(data_dir, "vqvae_model.pt")
         stats_path = os.path.join(data_dir, "vqvae_stats.pt")
         smplx_dir = os.path.join(data_dir, "smplx_models")
-        # Check files
-        if not os.path.exists(vqvae_ckpt):
-            return None, None, f"Missing VQ-VAE model at {vqvae_ckpt}"
-        output_html = f"motion_{text_prompt.replace(' ', '_')}.html"
-        # 3. Create Visualization (Saves HTML to disk)
-        visualize(
             tokens=tokens_for_vis,
             vqvae_ckpt=vqvae_ckpt,
             stats_path=stats_path,
@@ -187,83 +315,35 @@ def generate_motion_app(text_prompt):
             fps=20
         )
-        # 4. Prepare Outputs
-        if not os.path.exists(output_html):
-            return None, None, "Error: HTML file was not generated."
-        # A) Prepare Iframe for Preview (Base64 encoding)
-        with open(output_html, "rb") as f:
-            encoded_html = base64.b64encode(f.read()).decode('utf-8')
-        iframe = f"""<iframe
-            src="data:text/html;base64,{encoded_html}"
-            width="100%"
-            height="600px"
-            style="border:none;">
-        </iframe>"""
-        # B) Prepare Status Message
-        status_msg = f"✅ Success! Generated {len(m_tokens)} tokens.\nSequence: {tokens_for_vis[:50]}..."
-        # Return: (HTML Preview, File Path for Download, Status)
-        return iframe, output_html, status_msg
     except Exception as e:
-        import traceback
-        traceback.print_exc()
-        return None, None, f"Error: {str(e)}"
-# --- Gradio UI ---
-custom_css = """
-.gradio-container { max-width: 1400px !important; }
-.viz-section { min-height: 700px; }
-"""
-with gr.Blocks(css=custom_css, title="SignMotionGPT Demo") as demo:
-    gr.Markdown("# 🤟 SignMotionGPT Demo")
-    with gr.Row():
-        # INPUT COLUMN
-        with gr.Column(scale=1):
-            text_input = gr.Textbox(label="Enter Word", placeholder="e.g., push")
-            with gr.Row():
-                clear_btn = gr.Button("Clear")
-                submit_btn = gr.Button("Generate Motion", variant="primary")
-            status_output = gr.Textbox(label="Status", lines=5, interactive=False)
-            # DOWNLOAD BUTTON (New!)
-            gr.Markdown("### 📥 Download Result")
-            file_output = gr.File(label="Download HTML Animation")
-        # PREVIEW COLUMN
-        with gr.Column(scale=3, elem_classes="viz-section"):
-            gr.Markdown("### 🎭 Preview")
-            plot_output = gr.HTML(label="Avatar Motion")
-    # Examples
-    gr.Markdown("### Examples")
-    with gr.Row():
-        for word in ["push", "send", "library", "passport"]:
-            gr.Button(word).click(
-                fn=lambda w=word: w, outputs=text_input
-            ).then(
-                fn=generate_motion_app,
-                inputs=text_input,
-                outputs=[plot_output, file_output, status_output]
-            )
-    # Main Events
-    submit_btn.click(
-        fn=generate_motion_app,
-        inputs=[text_input],
-        outputs=[plot_output, file_output, status_output] # 3 Outputs
-    )
-    clear_btn.click(
-        fn=lambda: ("", None, None, ""),
-        outputs=[text_input, plot_output, file_output, status_output]
-    )
 if __name__ == "__main__":
-    demo.launch()

 import re
 import json
 import random
 from pathlib import Path
+# Add root to path to allow imports from project root when running from demo-code/
+# or when running from root
 current_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(current_dir)
 sys.path.append(current_dir)
 # Import project modules
 try:
     from visualize import visualize
+    # Try importing what we can, but we will implement generation logic directly here
+    # to match test_overfit.py / metrics.py exactly and avoid dependency issues.
+    # We catch Exception because unsloth in model.py might raise NotImplementedError on CPU
+    from model import get_motion_token_info
 except Exception as e:
     print(f"Error importing project modules: {e}")
+    print("Make sure you are running this from the project root or have the project structure intact.")
+    # Fallback for explicit relative imports if needed in some environments
+    try:
+        from visualize import visualize
+    except Exception as vis_e:
+        print(f"Visualize import failed too: {vis_e}")
 # Constants
 HF_REPO_ID = "rdz-falcon/SignMotionGPTfit-archive"
 CODEBOOK_SIZE = 512
 DATASET_PATH = os.environ.get("DATASET_PATH", "enriched_dataset.json")
+# Hardcoded Config from test_overfit.py / config.py
 INFERENCE_TEMPERATURE = 0.7
 INFERENCE_TOP_K = 50
 INFERENCE_REPETITION_PENALTY = 1.2
 # Global model cache
 MODEL = None
 TOKENIZER = None
+# We use M_START/M_END as in test_overfit.py
 M_START_ID = None
 M_END_ID = None
 VARIANT_MAP = {}
 def load_variant_map():
+    """Load dataset to map words to valid participant IDs."""
     global VARIANT_MAP
+    # Try multiple possible paths for the dataset
     candidates = [
         DATASET_PATH,
         os.path.join(os.path.dirname(__file__), DATASET_PATH),
+        os.path.join(os.path.dirname(__file__), "..", DATASET_PATH),
+        "data/motion_llm_dataset.json", # Fallback to raw dataset if enriched missing
         "motion_llm_dataset.json"
     ]
     found_path = None
     for p in candidates:
         if os.path.exists(p):
         try:
             with open(found_path, 'r', encoding='utf-8') as f:
                 data = json.load(f)
             mapping = {}
+            count = 0
             for entry in data:
+                # Support both formats (enriched or raw)
                 word = entry.get("word") or entry.get("text_query")
                 if not word: continue
+                # Clean word (sometimes text_query is "Motion for word 'hello'")
+                if "motion for word" in word.lower():
+                    # extraction heuristic if needed, but 'word' field is preferred
+                    pass
                 word = word.lower().strip()
                 pid = entry.get("participant_id")
                 if word and pid:
+                    if word not in mapping:
+                        mapping[word] = []
+                    if pid not in mapping[word]:
+                        mapping[word].append(str(pid))
+                        count += 1
             VARIANT_MAP = mapping
+            print(f"Loaded {count} variants for {len(VARIANT_MAP)} words.")
+            # Debug check for 'push'
+            if 'push' in VARIANT_MAP:
+                print(f"  'push' variants: {VARIANT_MAP['push']}")
+            else:
+                print("  'push' NOT found in dataset.")
         except Exception as e:
             print(f"Error loading dataset: {e}")
     else:
+        print(f"⚠️ Dataset not found. Tried: {candidates}. Variants will default to 'unknown'.")
+    # Hardcoded fallback for demonstration words if missing from dataset
+    defaults = {
+        "push": ["P40", "P123", "P1"],
+        "send": ["P40", "P123"],
+        "library": ["P40"],
+        "passport": ["P40"]
+    }
+    for w, pids in defaults.items():
+        if w not in VARIANT_MAP:
+            VARIANT_MAP[w] = pids
+            print(f"  Added fallback variants for '{w}': {pids}")
+def load_model_from_hf(repo_id, subfolder, token=None):
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    print(f"Loading model from HF: {repo_id}/{subfolder}")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(repo_id, subfolder=subfolder, token=token, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder=subfolder, token=token, trust_remote_code=True)
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None, None
 def init_model():
     global MODEL, TOKENIZER, M_START_ID, M_END_ID
     if MODEL is not None:
         return
     load_variant_map()
+    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+    # Load model/tokenizer
+    MODEL, TOKENIZER = load_model_from_hf(HF_REPO_ID, EPOCH_SUBFOLDER, token)
+    if MODEL is None:
+        raise RuntimeError(f"Failed to load model from {HF_REPO_ID}/{EPOCH_SUBFOLDER}")
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     MODEL.to(device)
     MODEL.eval()
+    # Setup special tokens matching test_overfit.py
+    # test_overfit.py uses M_START="<M_START>" and M_END="<M_END>"
+    # Check if tokens exist
+    if M_START not in TOKENIZER.get_vocab() or M_END not in TOKENIZER.get_vocab():
+        print(f"⚠️  Warning: {M_START} or {M_END} not found in tokenizer. Adding them now...")
+        num_added = TOKENIZER.add_special_tokens({"additional_special_tokens": [M_START, M_END]})
+        if num_added > 0:
+            MODEL.resize_token_embeddings(len(TOKENIZER))
+            print(f"   Added {num_added} special tokens.")
     M_START_ID = TOKENIZER.convert_tokens_to_ids(M_START)
     M_END_ID = TOKENIZER.convert_tokens_to_ids(M_END)
+    # Check motion tokens
+    # We expect <motion_0> ... <motion_511>
+    # If missing, add them
+    first_motion = "<motion_0>"
+    if first_motion not in TOKENIZER.get_vocab():
+        print("⚠️  Warning: Motion tokens not found. Adding them now...")
         motion_tokens = [f"<motion_{i}>" for i in range(CODEBOOK_SIZE)]
+        num_added = TOKENIZER.add_tokens(motion_tokens, special_tokens=True)
+        if num_added > 0:
+            MODEL.resize_token_embeddings(len(TOKENIZER))
+            print(f"   Added {num_added} motion tokens.")
+    print(f"Model initialized. Vocab size: {len(TOKENIZER)}")
+    print(f"M_START_ID: {M_START_ID}, M_END_ID: {M_END_ID}")
 def generate_motion_simple(model, tokenizer, prompt_text, device):
+    """
+    Replicates the simple generation logic from metrics.py / test_overfit.py
+    """
+    # Construct prompt exactly as in test_overfit.py:
+    # prompt = f"Instruction: Generate motion for word '{sample['word']}' with variant '{sample['participant_id']}'.\nMotion: "
+    # Get a valid participant ID if possible
     word_lower = prompt_text.lower().strip()
+    variants = VARIANT_MAP.get(word_lower, [])
+    if variants:
+        pid = random.choice(variants)
+        print(f"Selected variant '{pid}' for word '{prompt_text}'")
+    else:
+        # Fallback to 'unknown' or a common PID if known (e.g., P1)
+        pid = "unknown"
+        print(f"No variants found for '{prompt_text}', using '{pid}'")
     prompt = f"Instruction: Generate motion for word '{prompt_text}' with variant '{pid}'.\nMotion: "
+    print(f"Input Prompt:\n{prompt}")
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
             top_k=INFERENCE_TOP_K,
             repetition_penalty=INFERENCE_REPETITION_PENALTY,
             pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=M_END_ID, # Stop at <M_END>
             early_stopping=True
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=False)
+    # Parse output to extract just the motion part
+    # We expect: ... \nMotion: <M_START> <motion_...> ... <M_END>
+    if "Motion: " in decoded:
+        motion_part = decoded.split("Motion: ")[-1]
+    else:
+        motion_part = decoded
+    return motion_part.strip()
 def generate_motion_app(text_prompt):
     if not text_prompt:
+        return None, "Please enter a prompt."
     if MODEL is None:
         try:
             init_model()
         except Exception as e:
+            return None, f"Model Initialization Failed: {e}"
+    device = MODEL.device
     print(f"Generating for: {text_prompt}")
     try:
+        generated_sequence = generate_motion_simple(MODEL, TOKENIZER, text_prompt, device)
+        print("Generated sequence (raw):", generated_sequence)
+        # Extract tokens for visualization
+        # Logic from metrics.py: _extract_motion_tokens_from_sequence
+        # Expect tokens like <M123> or <motion_123>
+        # The generation might include M_START/M_END.
+        # Clean up for visualization input
+        # We need a string of tokens.
+        # If the output is like "<M_START> <motion_1> <motion_2> <M_END>", we pass that.
+        # visualize.py's parse_motion_tokens handles <motion_ID> regex.
+        # BUT visualize.py expects either "123 456" OR "<motion_123> <motion_456>"
+        # It does NOT explicitly handle <M123> which is what we might have here if M_START was used.
+        # Let's convert <M123> to space-separated integers for safety.
+        # Extract integers from <M123> or <motion_123>
+        # generated_sequence is raw string from tokenizer decode
+        import re
+        # Try <M123> format (test_overfit style)
         m_tokens = re.findall(r'<M(\d+)>', generated_sequence)
         if not m_tokens:
+            # Try <motion_123> format
             m_tokens = re.findall(r'<motion_(\d+)>', generated_sequence)
+        if m_tokens:
+            # Reconstruct as space-separated string for visualize.py
+            tokens_for_vis = " ".join(m_tokens)
+        else:
+            # Fallback to raw string if regex failed (visualize.py might handle other formats)
+            tokens_for_vis = generated_sequence
+        print(f"Tokens for visualization: {tokens_for_vis[:50]}...")
+    except Exception as e:
+        return None, f"Generation Error: {e}"
+    # Visualization
+    try:
+        # Ensure paths for VQ-VAE and SMPL-X
         data_dir = os.environ.get("DATA_DIR", "data")
         vqvae_ckpt = os.path.join(data_dir, "vqvae_model.pt")
         stats_path = os.path.join(data_dir, "vqvae_stats.pt")
         smplx_dir = os.path.join(data_dir, "smplx_models")
+        # Check existence
+        missing = []
+        if not os.path.exists(vqvae_ckpt): missing.append(vqvae_ckpt)
+        if not os.path.exists(stats_path): missing.append(stats_path)
+        if not os.path.exists(smplx_dir): missing.append(smplx_dir)
+        if missing:
+            return None, f"Missing visualization files in {data_dir}: {missing}. Please ensure they are uploaded to the Space."
+        # Output to a temporary file
+        output_html = "temp_viz.html"
+        fig = visualize(
             tokens=tokens_for_vis,
             vqvae_ckpt=vqvae_ckpt,
             stats_path=stats_path,
             fps=20
         )
+        if fig is None:
+             return None, "Visualization failed (no frames produced)."
+        # Count tokens for display
+        matches = re.findall(r'<motion_(\d+)>', tokens_for_vis)
+        # Also check for <M...> format just in case
+        if not matches:
+             matches = re.findall(r'<M(\d+)>', tokens_for_vis)
+        num_tokens = len(matches)
+        return fig, f"Success! Generated tokens length: {num_tokens}. Sequence: {tokens_for_vis[:100]}..."
     except Exception as e:
+        return None, f"Visualization Error: {e}"
+# Gradio UI
+with gr.Interface(
+    fn=generate_motion_app,
+    inputs=gr.Textbox(label="Enter Motion Prompt", placeholder="e.g. walking forward"),
+    outputs=[
+        gr.Plot(label="Motion Visualization"),
+        gr.Textbox(label="Status/Output")
+    ],
+    title="SignMotionGPT Demo",
+    description="Generate Sign Language/Motion Avatars from Text. Using model checkpoint: epoch 30."
+) as demo:
+    pass
 if __name__ == "__main__":
+    demo.launch()