Spaces:

st192011
/

ASL-VLM-Protocol

Running

App Files Files Community

st192011 commited on 21 days ago

Commit

ed5073f

verified ·

1 Parent(s): efbf139

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import json
+import gradio as gr
+from gradio_client import Client, handle_file
+from huggingface_hub import hf_hub_download, list_repo_files
+# 1. CONFIG & PRIVATE LINK
+HF_TOKEN = os.environ.get("HF_TOKEN")
+PRIVATE_SPACE = "st192011/ASL-VLS-Private"
+try:
+    api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
+except:
+    api_client = None
+# 2. DATASET PREP
+print("Fetching dataset library...")
+all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
+data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
+# Create a clean display name map (using ID as key for simplicity in demo)
+dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
+# 3. UI LOGIC
+def update_video_display(selection):
+    """Downloads the selected file and shows it in the Video player"""
+    if not selection: return None
+    hf_path = dataset_choices[selection]
+    local_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
+    return local_path
+def run_omnisign(video):
+    if not video: return {"Error: No Input": 0.0}
+    if not api_client: return {"Error: Engine Offline": 0.0}
+    try:
+        # Correct Protocol for Space-to-Space file transfer
+        result = api_client.predict(
+            video_file=handle_file(video),
+            api_name="/predict"
+        )
+        return result
+    except:
+        return {"Neural Engine Timeout": 0.0}
+# 4. THE INTERFACE (PITCH MODE)
+with gr.Blocks(theme="monochrome") as demo:
+    gr.Markdown("""
+    # 🧠 OmniSign VLM
+    ### **The Future of Universal Motion Recognition**
+    OmniSign is a proprietary neural system powered by **Large Vision-Language Models (VLM)**.
+    Unlike traditional AI that requires massive specific datasets, our **Neural Transduction**
+    technology generalizes across signers, environments, and devices instantly.
+    **Key Advantages:**
+    - **Zero-Shot Adaptation:** Recognizes signs regardless of background or signer identity.
+    - **Instant Lexical Scaling:** Vocabulary can be updated in seconds without retraining.
+    - **Temporal Precision:** Deep analysis of high-density motion trajectories.
+    """)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🎦 1. Input Stream")
+            video_display = gr.Video(label="Active Video Buffer")
+            with gr.Group():
+                dataset_drop = gr.Dropdown(
+                    choices=[""] + sorted(list(dataset_choices.keys())),
+                    label="Select Sample from WLASL Archive"
+                )
+            run_btn = gr.Button("🚀 Start Neural Analysis", variant="primary")
+        with gr.Column():
+            gr.Markdown("### 📊 2. Lexical Prediction")
+            output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
+            gr.Markdown("""
+            *This demonstration operates on a high-frequency ASL subset. The engine is
+            designed for cross-language universal sign interpretation.*
+            """)
+    # Event: When dropdown changes, update the video player
+    dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
+    # Event: When button clicked, analyze the video currently in the player
+    run_btn.click(fn=run_omnisign, inputs=video_display, outputs=output_label)
+if __name__ == "__main__":
+    demo.launch()