Spaces:

st192011
/

ASL-VLM-Protocol

Sleeping

App Files Files Community

st192011 commited on about 1 month ago

Commit

6bf42f7

verified ·

1 Parent(s): ed5073f

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -44

app.py CHANGED Viewed

@@ -1,91 +1,109 @@
 import os
 import json
 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import hf_hub_download, list_repo_files
-# 1. CONFIG & PRIVATE LINK
 HF_TOKEN = os.environ.get("HF_TOKEN")
 PRIVATE_SPACE = "st192011/ASL-VLS-Private"
 try:
     api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
-except:
     api_client = None
-# 2. DATASET PREP
-print("Fetching dataset library...")
 all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
 data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
-# Create a clean display name map (using ID as key for simplicity in demo)
 dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
-# 3. UI LOGIC
 def update_video_display(selection):
-    """Downloads the selected file and shows it in the Video player"""
     if not selection: return None
     hf_path = dataset_choices[selection]
-    local_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
     return local_path
-def run_omnisign(video):
-    if not video: return {"Error: No Input": 0.0}
-    if not api_client: return {"Error: Engine Offline": 0.0}
     try:
-        # Correct Protocol for Space-to-Space file transfer
         result = api_client.predict(
             video_file=handle_file(video),
             api_name="/predict"
         )
         return result
-    except:
-        return {"Neural Engine Timeout": 0.0}
-# 4. THE INTERFACE (PITCH MODE)
-with gr.Blocks(theme="monochrome") as demo:
-    gr.Markdown("""
     # 🧠 OmniSign VLM
-    ### **The Future of Universal Motion Recognition**
-    OmniSign is a proprietary neural system powered by **Large Vision-Language Models (VLM)**.
-    Unlike traditional AI that requires massive specific datasets, our **Neural Transduction**
-    technology generalizes across signers, environments, and devices instantly.
-    **Key Advantages:**
-    - **Zero-Shot Adaptation:** Recognizes signs regardless of background or signer identity.
-    - **Instant Lexical Scaling:** Vocabulary can be updated in seconds without retraining.
-    - **Temporal Precision:** Deep analysis of high-density motion trajectories.
     """)
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 🎦 1. Input Stream")
-            video_display = gr.Video(label="Active Video Buffer")
-            with gr.Group():
-                dataset_drop = gr.Dropdown(
-                    choices=[""] + sorted(list(dataset_choices.keys())),
-                    label="Select Sample from WLASL Archive"
-                )
-            run_btn = gr.Button("🚀 Start Neural Analysis", variant="primary")
         with gr.Column():
-            gr.Markdown("### 📊 2. Lexical Prediction")
-            output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
-            gr.Markdown("""
-            *This demonstration operates on a high-frequency ASL subset. The engine is
-            designed for cross-language universal sign interpretation.*
-            """)
-    # Event: When dropdown changes, update the video player
     dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
-    # Event: When button clicked, analyze the video currently in the player
-    run_btn.click(fn=run_omnisign, inputs=video_display, outputs=output_label)
 if __name__ == "__main__":
     demo.launch()

 import os
+import shutil
 import json
 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import hf_hub_download, list_repo_files
+# 1. SECRETS & BACKEND LINK
 HF_TOKEN = os.environ.get("HF_TOKEN")
 PRIVATE_SPACE = "st192011/ASL-VLS-Private"
 try:
     api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
+except Exception as e:
+    print(f"Connection Error: {e}")
     api_client = None
+# 2. LOAD SUPPORTED GLOSSARY
+# We load the JSON just to get the list of words for the UI
+KB_FILE = "asl_rag_knowledge_base.json"
+supported_glosses = []
+if os.path.exists(KB_FILE):
+    with open(KB_FILE, 'r') as f:
+        kb_data = json.load(f)
+    supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
+# 3. DATASET DISCOVERY
+print("Syncing with WLASL Archive...")
 all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
+# Filter for data_0 videos only
 data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
 dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
+# 4. LOGIC FUNCTIONS
 def update_video_display(selection):
+    """Downloads the file and moves it to a Gradio-accessible directory"""
     if not selection: return None
     hf_path = dataset_choices[selection]
+    # Download from HF cache
+    cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
+    # FIX: Copy to /tmp to bypass Gradio InvalidPathError
+    local_path = os.path.join("/tmp", selection)
+    shutil.copy(cache_path, local_path)
     return local_path
+def run_omnisign_analysis(video):
+    """Sends the active video (sample or user-recorded) to the private VLM engine"""
+    if not video: return {"Error: No Video Detected": 0.0}
+    if not api_client: return {"Error: Neural Backend Offline": 0.0}
     try:
+        # Use handle_file to safely stream the video to the private space
         result = api_client.predict(
             video_file=handle_file(video),
             api_name="/predict"
         )
         return result
+    except Exception as e:
+        return {f"Neural Engine Error: {str(e)}": 0.0}
+# 5. UI DESIGN (PITCH FORMAT)
+with gr.Blocks(theme="glass") as demo:
+    gr.Markdown(f"""
     # 🧠 OmniSign VLM
+    ### **Universal Neural Sign Language Protocol**
+    This demonstration introduces a revolutionary **VLM-based architecture** for sign language interpretation.
+    Unlike traditional models that are prone to overfitting, the OmniSign protocol leverages **Temporal Neural Transduction** to generalize across all environments and signers instantly.
+    **Proprietary Core Advantages:**
+    *   **Universal Generalization:** Robust performance in any environment, lighting, or camera angle.
+    *   **Instant Lexical Scaling:** The protocol allows for adding any new sign language word instantly without retraining.
+    *   **Person-Agnostic Reasoning:** The system analyzes movement logic rather than memorizing specific signers.
+    ---
+    *Notice: This is a structural demonstration. The current engine is non-optimized and operates on a limited vocabulary subset.*
     """)
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 🎦 1. Input Interface")
+            # This player handles BOTH uploads and the samples from the dropdown
+            video_display = gr.Video(label="Neural Input Buffer")
+            dataset_drop = gr.Dropdown(
+                choices=[""] + sorted(list(dataset_choices.keys())),
+                label="Explore WLASL data_0 Samples"
+            )
+            gr.Markdown("*Tip: Select a sample above to watch it, then sign it yourself or analyze the sample.*")
+            run_btn = gr.Button("🚀 Execute Neural Analysis", variant="primary")
         with gr.Column():
+            gr.Markdown("### 📊 2. VLM Perception Output")
+            output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
+            with gr.Accordion("🔍 Supported Glossary", open=True):
+                gr.Markdown(f"**The system currently recognizes {len(supported_glosses)} signs:**")
+                gr.Markdown(", ".join(supported_glosses))
+    # Event Mapping
     dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
+    run_btn.click(fn=run_omnisign_analysis, inputs=video_display, outputs=output_label)
 if __name__ == "__main__":
     demo.launch()