Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from gradio_client import Client, handle_file
|
| 5 |
+
from huggingface_hub import hf_hub_download, list_repo_files
|
| 6 |
+
|
| 7 |
+
# 1. CONFIG & PRIVATE LINK
|
| 8 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 9 |
+
PRIVATE_SPACE = "st192011/ASL-VLS-Private"
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
|
| 13 |
+
except:
|
| 14 |
+
api_client = None
|
| 15 |
+
|
| 16 |
+
# 2. DATASET PREP
|
| 17 |
+
print("Fetching dataset library...")
|
| 18 |
+
all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
|
| 19 |
+
data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
|
| 20 |
+
|
| 21 |
+
# Create a clean display name map (using ID as key for simplicity in demo)
|
| 22 |
+
dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
|
| 23 |
+
|
| 24 |
+
# 3. UI LOGIC
|
| 25 |
+
def update_video_display(selection):
|
| 26 |
+
"""Downloads the selected file and shows it in the Video player"""
|
| 27 |
+
if not selection: return None
|
| 28 |
+
hf_path = dataset_choices[selection]
|
| 29 |
+
local_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
|
| 30 |
+
return local_path
|
| 31 |
+
|
| 32 |
+
def run_omnisign(video):
|
| 33 |
+
if not video: return {"Error: No Input": 0.0}
|
| 34 |
+
if not api_client: return {"Error: Engine Offline": 0.0}
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Correct Protocol for Space-to-Space file transfer
|
| 38 |
+
result = api_client.predict(
|
| 39 |
+
video_file=handle_file(video),
|
| 40 |
+
api_name="/predict"
|
| 41 |
+
)
|
| 42 |
+
return result
|
| 43 |
+
except:
|
| 44 |
+
return {"Neural Engine Timeout": 0.0}
|
| 45 |
+
|
| 46 |
+
# 4. THE INTERFACE (PITCH MODE)
|
| 47 |
+
with gr.Blocks(theme="monochrome") as demo:
|
| 48 |
+
gr.Markdown("""
|
| 49 |
+
# 🧠 OmniSign VLM
|
| 50 |
+
### **The Future of Universal Motion Recognition**
|
| 51 |
+
|
| 52 |
+
OmniSign is a proprietary neural system powered by **Large Vision-Language Models (VLM)**.
|
| 53 |
+
Unlike traditional AI that requires massive specific datasets, our **Neural Transduction**
|
| 54 |
+
technology generalizes across signers, environments, and devices instantly.
|
| 55 |
+
|
| 56 |
+
**Key Advantages:**
|
| 57 |
+
- **Zero-Shot Adaptation:** Recognizes signs regardless of background or signer identity.
|
| 58 |
+
- **Instant Lexical Scaling:** Vocabulary can be updated in seconds without retraining.
|
| 59 |
+
- **Temporal Precision:** Deep analysis of high-density motion trajectories.
|
| 60 |
+
""")
|
| 61 |
+
|
| 62 |
+
with gr.Row():
|
| 63 |
+
with gr.Column():
|
| 64 |
+
gr.Markdown("### 🎦 1. Input Stream")
|
| 65 |
+
video_display = gr.Video(label="Active Video Buffer")
|
| 66 |
+
|
| 67 |
+
with gr.Group():
|
| 68 |
+
dataset_drop = gr.Dropdown(
|
| 69 |
+
choices=[""] + sorted(list(dataset_choices.keys())),
|
| 70 |
+
label="Select Sample from WLASL Archive"
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
run_btn = gr.Button("🚀 Start Neural Analysis", variant="primary")
|
| 74 |
+
|
| 75 |
+
with gr.Column():
|
| 76 |
+
gr.Markdown("### 📊 2. Lexical Prediction")
|
| 77 |
+
output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
|
| 78 |
+
|
| 79 |
+
gr.Markdown("""
|
| 80 |
+
*This demonstration operates on a high-frequency ASL subset. The engine is
|
| 81 |
+
designed for cross-language universal sign interpretation.*
|
| 82 |
+
""")
|
| 83 |
+
|
| 84 |
+
# Event: When dropdown changes, update the video player
|
| 85 |
+
dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
|
| 86 |
+
|
| 87 |
+
# Event: When button clicked, analyze the video currently in the player
|
| 88 |
+
run_btn.click(fn=run_omnisign, inputs=video_display, outputs=output_label)
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
demo.launch()
|