Spaces:

st192011
/

ASL-VLM-Protocol

Sleeping

App Files Files Community

st192011 commited on Feb 1

Commit

43cfccd

verified ·

1 Parent(s): c450334

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -44

app.py CHANGED Viewed

@@ -5,104 +5,122 @@ import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import hf_hub_download, list_repo_files
-# 1. SECRETS
 HF_TOKEN = os.environ.get("HF_TOKEN")
 PRIVATE_SPACE = "st192011/ASL-VLS-Private"
-# 2. LOAD SUPPORTED GLOSSARY (UI only)
 KB_FILE = "asl_rag_knowledge_base.json"
 supported_glosses = []
 if os.path.exists(KB_FILE):
-    try:
-        with open(KB_FILE, 'r') as f:
-            kb_data = json.load(f)
-        supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
-    except:
-        supported_glosses = ["Error loading glossary"]
-# 3. DATASET DISCOVERY
-print("Syncing with WLASL Archive...")
 try:
     all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
     data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
     dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
-except:
     dataset_choices = {}
-# 4. LOGIC FUNCTIONS
 def update_video_display(selection):
     if not selection: return None
     try:
         hf_path = dataset_choices[selection]
         cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
         local_path = os.path.join("/tmp", selection)
         shutil.copy(cache_path, local_path)
         return local_path
     except Exception as e:
-        print(f"File Error: {e}")
         return None
-def run_omnisign_analysis(video):
-    if not video:
-        return {"Error": "No video input detected."}
-    # LAZY LOADING: Initialize client here to avoid startup crashes
-    try:
-        api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
-    except Exception as e:
-        return {"Connection Error": f"Could not reach private engine. Please ensure it is running. ({str(e)})"}
     try:
-        # Pass the video file to the private space
-        result = api_client.predict(
-            video_file=handle_file(video),
-            api_name="/predict"
         )
         return result
     except Exception as e:
-        return {"Processing Error": f"The neural engine timed out or failed: {str(e)}"}
-# 5. UI DESIGN (PITCH FORMAT)
-with gr.Blocks(theme="glass") as demo:
     gr.Markdown(f"""
     # 🧠 OmniSign VLM
     ### **Universal Neural Sign Language Protocol**
-    OmniSign is a proprietary architecture for sign language interpretation powered by **Large Vision-Language Models (VLM)**.
-    Our **Temporal Neural Transduction** protocol enables zero-shot generalization across signers and environments.
-    **Core Advantages:**
-    *   **Universal Generalization:** High performance regardless of lighting, background, or camera.
-    *   **Instant Lexical Scaling:** Vocabulary updates in seconds via semantic indexing.
-    *   **Person-Agnostic:** Analyzes movement logic rather than memorizing individual signers.
     ---
-    *Notice: This is a structural demonstration. The engine is currently non-optimized and operates on a limited vocabulary.*
     """)
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 🎦 1. Input Interface")
-            video_display = gr.Video(label="Neural Input Buffer")
             dataset_drop = gr.Dropdown(
                 choices=[""] + sorted(list(dataset_choices.keys())),
-                label="Explore WLASL data_0 Samples (Verified Support)",
                 value=""
             )
             run_btn = gr.Button("🚀 Execute Neural Analysis", variant="primary")
         with gr.Column():
-            gr.Markdown("### 📊 2. VLM Perception Output")
             output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
-            with gr.Accordion("🔍 Supported Glossary", open=True):
                 gr.Markdown(", ".join(supported_glosses))
-    dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
-    run_btn.click(fn=run_omnisign_analysis, inputs=video_display, outputs=output_label)
 if __name__ == "__main__":
-    # Set ssr_mode=False to improve stability on Hugging Face
     demo.launch(ssr_mode=False)

 from gradio_client import Client, handle_file
 from huggingface_hub import hf_hub_download, list_repo_files
+# 1. AUTHENTICATION
+# Ensure HF_TOKEN is in your Space Secrets
 HF_TOKEN = os.environ.get("HF_TOKEN")
 PRIVATE_SPACE = "st192011/ASL-VLS-Private"
+# Initialize client globally but handle reconnection logic
+try:
+    client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
+except Exception as e:
+    print(f"Initial connection failed: {e}")
+    client = None
+# 2. UI GLOSSARY (Load from the uploaded JSON)
 KB_FILE = "asl_rag_knowledge_base.json"
 supported_glosses = []
 if os.path.exists(KB_FILE):
+    with open(KB_FILE, 'r') as f:
+        kb_data = json.load(f)
+    supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
+# 3. DATASET DISCOVERY (WLASL data_0)
+print("Discovery: Syncing with WLASL Dataset...")
 try:
     all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
     data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
     dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
+except Exception as e:
+    print(f"Repo listing failed: {e}")
     dataset_choices = {}
+# 4. LOGIC
 def update_video_display(selection):
+    """Downloads sample and moves to local /tmp for playback access"""
     if not selection: return None
     try:
         hf_path = dataset_choices[selection]
+        # Download to HF cache
         cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
+        # Move to /tmp so Gradio can play it
         local_path = os.path.join("/tmp", selection)
         shutil.copy(cache_path, local_path)
         return local_path
     except Exception as e:
+        print(f"Playback error: {e}")
         return None
+def run_omnisign_vlm(video_path):
+    """Sends video to private VLM engine using handle_file protocol"""
+    if not video_path:
+        return {"Error": "No input detected."}
+    global client
+    if client is None:
+        try:
+            client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
+        except:
+            return {"Neural Engine Offline": 0.0}
     try:
+        # The key: Use handle_file to wrap the path for the API
+        # We call the explicit api_name we set in the private space
+        result = client.predict(
+            video_file=handle_file(video_path),
+            api_name="/predict_sign"
         )
         return result
     except Exception as e:
+        return {f"Neural Analysis Failed: {str(e)}": 0.0}
+# 5. UI DESIGN (Pitch Presentation)
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
     # 🧠 OmniSign VLM
     ### **Universal Neural Sign Language Protocol**
+    OmniSign is an advanced structural demonstration of **Large Vision-Language Model (VLM)** capabilities applied to human kinetic semantics.
+    Our protocol uses **Temporal Neural Transduction** to interpret sign language without the limitations of traditional, person-specific training.
+    **Technology Highlights:**
+    - **Zero-Shot Environmental Adaption:** Works across any lighting or background.
+    - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (ASL, BSL, etc.) without retraining.
+    - **Human-Independent Reasoning:** Focuses on movement logic rather than signer identity.
     ---
+    *Notice: This demonstration uses an unoptimized, limited vocabulary subset for structural proof-of-concept.*
     """)
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 🎦 1. Select Input")
+            video_comp = gr.Video(label="Input Buffer", autoplay=True)
             dataset_drop = gr.Dropdown(
                 choices=[""] + sorted(list(dataset_choices.keys())),
+                label="Explore Dataset Samples (Verified Support)",
                 value=""
             )
+            gr.Markdown("""*Choose a sample to watch it in the buffer. You can then click analyze,
+            or record your own version of that word to test the VLM's robustness.*""")
             run_btn = gr.Button("🚀 Execute Neural Analysis", variant="primary")
         with gr.Column():
+            gr.Markdown("### 📊 2. VLM Perception Result")
             output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
+            with gr.Accordion("🔍 View Supported Vocabulary", open=True):
                 gr.Markdown(", ".join(supported_glosses))
+    # Link Dropdown to Video Player
+    dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_comp)
+    # Link Analyze Button to Private API
+    run_btn.click(fn=run_omnisign_vlm, inputs=video_comp, outputs=output_label)
 if __name__ == "__main__":
+    # Disabling ssr_mode resolves the "Invalid file descriptor" issue in asyncio
     demo.launch(ssr_mode=False)