Spaces:

st192011
/

ASL-VLM-Protocol

Sleeping

App Files Files Community

st192011 commited on Feb 1

Commit

f552d94

verified ·

1 Parent(s): 4ad9231

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -42

app.py CHANGED Viewed

@@ -6,48 +6,54 @@ from huggingface_hub import hf_hub_download, list_repo_files
 # 1. SECRETS & BACKEND LINK
 HF_TOKEN = os.environ.get("HF_TOKEN")
-PRIVATE_SPACE = "st192011/ASL-VLS-Private"
-# 2. TRADE SECRET: EXPLICIT SUPPORTED VOCABULARY (Hiding the KB structure)
-SUPPORTED_GLOSSES = [
-    "ADAPT", "ADD", "ABOUT", "ACCIDENT", "ACCOUNTANT",
-    "ACROSS", "ACTIVE", "ACTOR", "ADJECTIVE", "ACCEPT",
-    "ABOVE", "ABLE", "ACTION", "ACTIVITY", "ADDRESS",
-    "ACCOMPLISH", "ACCENT"
 ]
-# 3. DATASET DISCOVERY
-# This block is essential for the dropdown in the public demo
-try:
-    all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
-    data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
-    dataset_options = {}
-    for f_path in data_0_mp4s:
-        vid_id = os.path.basename(f_path).replace(".mp4", "")
-        # Filter for samples that match our supported list (for a clean demo)
-        if any(vid_id in str(s) for s in ["00944", "00963", "00335", "00689", "00842", "01064", "00416", "00947", "00377", "00832"]):
-            gloss_name = [g for g in SUPPORTED_GLOSSES if g.startswith(vid_id[1]) or g.endswith(vid_id[-1])][0] # Simple heuristic
-            dataset_options[f"{gloss_name} (Sample {vid_id})"] = f_path
-except Exception as e:
-    dataset_options = {}
 # 4. INITIALIZE CLIENT
 try:
-    client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
-except:
     client = None
 # 5. LOGIC FUNCTIONS
 def update_video_display(selection):
-    """Downloads sample, copies to /tmp, and returns path + Ground Truth for display."""
-    if not selection: return None, None
     try:
-        # Extract Ground Truth from dropdown display name
         gloss_gt = selection.split('(')[0].strip()
-        # Download video file to /tmp for local playback
         hf_path = dataset_options[selection]
         cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
         local_path = os.path.join("/tmp", os.path.basename(hf_path))
@@ -55,25 +61,29 @@ def update_video_display(selection):
         return local_path, f"Ground Truth: {gloss_gt}"
     except Exception as e:
-        return None, f"Error: {e}"
 def run_omnisign_vlm(video_path):
-    """Sends video to private VLM engine using positional arguments."""
     if not video_path: return {"Error": "No input detected."}
     if not client: return {"Neural Engine Offline": 0.0}
     try:
-        # FIX: Pass handle_file(video_path) as the FIRST argument (Positional)
-        # Do not use "video_file=" or "video_path=" keys.
         result = client.predict(
-            handle_file(video_path),
-            api_name="/predict_sign"
         )
         return result
     except Exception as e:
-        return {f"Neural Engine Error: {str(e)}": 0.0}
-# 6. UI DESIGN (Final Pitch Presentation)
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
     # 🧠 OmniSign VLM: Universal SL Protocol
@@ -86,7 +96,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
     ---
-    *Notice: This is a structural demonstration. The engine is currently unoptimized and operates on a limited vocabulary subset.*
     """)
     with gr.Row():
@@ -100,7 +110,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 value=""
             )
-            # Ground Truth Display
             gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
             run_btn = gr.Button("🚀 Execute Neural Analysis", variant="primary")
@@ -110,8 +119,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
             with gr.Accordion("🔍 View Supported Vocabulary List", open=True):
-                gr.Markdown(f"**This demo subset recognizes {len(SUPPORTED_GLOSSES)} words:**")
-                gr.Markdown(", ".join(SUPPORTED_GLOSSES))
     # Event Mapping
     dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])

 # 1. SECRETS & BACKEND LINK
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Make sure this matches your private space URL exactly
+PRIVATE_SPACE = "st192011/ASL-VLS-Private"
+# 2. DEFINITIVE SUPPORTED VOCABULARY LIST
+SUPPORTED_VIDEOS = [
+    ("00944", "ADAPT"), ("00963", "ADD"), ("01064", "ADJECTIVE"), ("00335", "ABDOMEN"),
+    ("00689", "ACCOUNTANT"), ("00899", "ACTOR"), ("00584", "ACCENT"), ("00632", "ACCIDENT"),
+    ("00586", "ACCENT"), ("00585", "ACCENT"), ("00626", "ACCIDENT"), ("00623", "ACCIDENT"),
+    ("00846", "ACT"), ("00890", "ACTIVITY"), ("00898", "ACTOR"), ("01011", "ADDRESS"),
+    ("00834", "ACROSS"), ("00624", "ACCIDENT"), ("00593", "ACCEPT"), ("00415", "ABOUT"),
+    ("00961", "ADD"), ("00962", "ADD"), ("00594", "ACCEPT"), ("00964", "ADD"),
+    ("00666", "ACCOMPLISH"), ("01065", "ADJECTIVE"), ("00628", "ACCIDENT"), ("00868", "ACTIVE"),
+    ("00836", "ACROSS"), ("00430", "ABOVE"), ("00835", "ACROSS"), ("00946", "ADAPT"),
+    ("00943", "ADAPT"), ("00414", "ABOUT"), ("00376", "ABLE"), ("00832", "ACROSS"),
+    ("00627", "ACCIDENT"), ("00592", "ACCEPT"), ("00625", "ACCIDENT"), ("01012", "ADDRESS"),
+    ("00849", "ACT"), ("00663", "ACCOMPLISH"), ("00853", "ACTION"), ("00967", "ADD"),
+    ("00692", "ACCOUNTANT"), ("00583", "ACCENT"), ("00341", "ACROSS"), ("00378", "ADDRESS"),
+    ("00433", "ADJECTIVE"), ("00384", "ACTOR"), ("00381", "ACTOR"), ("00377", "ACCIDENT"),
+    ("00382", "ACTOR"), ("00378", "ADDRESS")
 ]
+SUPPORTED_GLOSSES_UNIQUE = sorted(list(set([g for _, g in SUPPORTED_VIDEOS])))
+# 3. DATASET DISCOVERY AND MAPPING
+print("Dataset Discovery: Mapping specific video IDs to Glosses...")
+dataset_options = {}
+for vid_id, gloss in SUPPORTED_VIDEOS:
+    # Construct the full HF path (assuming 5-digit ID)
+    hf_path = f"data/data_0/{vid_id.zfill(5)}.mp4"
+    display_name = f"{gloss} (Sample {vid_id})"
+    dataset_options[display_name] = hf_path
 # 4. INITIALIZE CLIENT
+print(f"🔌 Attempting connection to {PRIVATE_SPACE}...")
 try:
+    # Use 'token=' (standard) instead of 'hf_token='
+    client = Client(PRIVATE_SPACE, token=HF_TOKEN)
+    print("✅ Neural Engine Online!")
+except Exception as e:
+    print(f"❌ Connection Failed: {e}")
     client = None
 # 5. LOGIC FUNCTIONS
 def update_video_display(selection):
+    if not selection: return None, None
     try:
         gloss_gt = selection.split('(')[0].strip()
+        # Download the video file to /tmp for local playback
         hf_path = dataset_options[selection]
         cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
         local_path = os.path.join("/tmp", os.path.basename(hf_path))
         return local_path, f"Ground Truth: {gloss_gt}"
     except Exception as e:
+        return None, f"Error downloading sample: {e}"
 def run_omnisign_vlm(video_path):
+    """
+    Submits the video to the private backend.
+    CRITICAL: Must use positional arguments for handle_file().
+    """
     if not video_path: return {"Error": "No input detected."}
     if not client: return {"Neural Engine Offline": 0.0}
     try:
+        # --- THE FIX IS HERE ---
+        # We pass handle_file(video_path) as the FIRST argument (positional).
+        # We do NOT use 'video_file=' or 'video=' as a keyword.
         result = client.predict(
+            handle_file(video_path),
+            api_name="/predict_sign"
         )
         return result
     except Exception as e:
+        return {f"Neural Analysis Failed: {str(e)}": 0.0}
+# 6. UI DESIGN
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
     # 🧠 OmniSign VLM: Universal SL Protocol
     - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
     ---
+    *Notice: This demonstration uses an unoptimized, limited vocabulary subset for structural proof-of-concept.*
     """)
     with gr.Row():
                 value=""
             )
             gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
             run_btn = gr.Button("🚀 Execute Neural Analysis", variant="primary")
             output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
             with gr.Accordion("🔍 View Supported Vocabulary List", open=True):
+                gr.Markdown(f"**This demo subset recognizes {len(SUPPORTED_GLOSSES_UNIQUE)} unique words:**")
+                gr.Markdown(", ".join(SUPPORTED_GLOSSES_UNIQUE))
     # Event Mapping
     dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])