st192011 commited on
Commit
f552d94
Β·
verified Β·
1 Parent(s): 4ad9231

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -42
app.py CHANGED
@@ -6,48 +6,54 @@ from huggingface_hub import hf_hub_download, list_repo_files
6
 
7
  # 1. SECRETS & BACKEND LINK
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
- PRIVATE_SPACE = "st192011/ASL-VLS-Private"
 
10
 
11
- # 2. TRADE SECRET: EXPLICIT SUPPORTED VOCABULARY (Hiding the KB structure)
12
- SUPPORTED_GLOSSES = [
13
- "ADAPT", "ADD", "ABOUT", "ACCIDENT", "ACCOUNTANT",
14
- "ACROSS", "ACTIVE", "ACTOR", "ADJECTIVE", "ACCEPT",
15
- "ABOVE", "ABLE", "ACTION", "ACTIVITY", "ADDRESS",
16
- "ACCOMPLISH", "ACCENT"
 
 
 
 
 
 
 
 
 
 
17
  ]
 
18
 
19
- # 3. DATASET DISCOVERY
20
- # This block is essential for the dropdown in the public demo
21
- try:
22
- all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
23
- data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
24
-
25
- dataset_options = {}
26
- for f_path in data_0_mp4s:
27
- vid_id = os.path.basename(f_path).replace(".mp4", "")
28
- # Filter for samples that match our supported list (for a clean demo)
29
- if any(vid_id in str(s) for s in ["00944", "00963", "00335", "00689", "00842", "01064", "00416", "00947", "00377", "00832"]):
30
- gloss_name = [g for g in SUPPORTED_GLOSSES if g.startswith(vid_id[1]) or g.endswith(vid_id[-1])][0] # Simple heuristic
31
- dataset_options[f"{gloss_name} (Sample {vid_id})"] = f_path
32
- except Exception as e:
33
- dataset_options = {}
34
 
35
  # 4. INITIALIZE CLIENT
 
36
  try:
37
- client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
38
- except:
 
 
 
39
  client = None
40
 
41
-
42
  # 5. LOGIC FUNCTIONS
43
  def update_video_display(selection):
44
- """Downloads sample, copies to /tmp, and returns path + Ground Truth for display."""
45
- if not selection: return None, None
46
  try:
47
- # Extract Ground Truth from dropdown display name
48
  gloss_gt = selection.split('(')[0].strip()
49
 
50
- # Download video file to /tmp for local playback
51
  hf_path = dataset_options[selection]
52
  cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
53
  local_path = os.path.join("/tmp", os.path.basename(hf_path))
@@ -55,25 +61,29 @@ def update_video_display(selection):
55
 
56
  return local_path, f"Ground Truth: {gloss_gt}"
57
  except Exception as e:
58
- return None, f"Error: {e}"
59
 
60
  def run_omnisign_vlm(video_path):
61
- """Sends video to private VLM engine using positional arguments."""
 
 
 
62
  if not video_path: return {"Error": "No input detected."}
63
  if not client: return {"Neural Engine Offline": 0.0}
64
-
65
  try:
66
- # FIX: Pass handle_file(video_path) as the FIRST argument (Positional)
67
- # Do not use "video_file=" or "video_path=" keys.
 
68
  result = client.predict(
69
- handle_file(video_path),
70
- api_name="/predict_sign"
71
  )
72
  return result
73
  except Exception as e:
74
- return {f"Neural Engine Error: {str(e)}": 0.0}
75
 
76
- # 6. UI DESIGN (Final Pitch Presentation)
77
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
78
  gr.Markdown(f"""
79
  # 🧠 OmniSign VLM: Universal SL Protocol
@@ -86,7 +96,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
  - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
87
 
88
  ---
89
- *Notice: This is a structural demonstration. The engine is currently unoptimized and operates on a limited vocabulary subset.*
90
  """)
91
 
92
  with gr.Row():
@@ -100,7 +110,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
  value=""
101
  )
102
 
103
- # Ground Truth Display
104
  gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
105
 
106
  run_btn = gr.Button("πŸš€ Execute Neural Analysis", variant="primary")
@@ -110,8 +119,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
  output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
111
 
112
  with gr.Accordion("πŸ” View Supported Vocabulary List", open=True):
113
- gr.Markdown(f"**This demo subset recognizes {len(SUPPORTED_GLOSSES)} words:**")
114
- gr.Markdown(", ".join(SUPPORTED_GLOSSES))
115
 
116
  # Event Mapping
117
  dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])
 
6
 
7
  # 1. SECRETS & BACKEND LINK
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
+ # Make sure this matches your private space URL exactly
10
+ PRIVATE_SPACE = "st192011/ASL-VLS-Private"
11
 
12
+ # 2. DEFINITIVE SUPPORTED VOCABULARY LIST
13
+ SUPPORTED_VIDEOS = [
14
+ ("00944", "ADAPT"), ("00963", "ADD"), ("01064", "ADJECTIVE"), ("00335", "ABDOMEN"),
15
+ ("00689", "ACCOUNTANT"), ("00899", "ACTOR"), ("00584", "ACCENT"), ("00632", "ACCIDENT"),
16
+ ("00586", "ACCENT"), ("00585", "ACCENT"), ("00626", "ACCIDENT"), ("00623", "ACCIDENT"),
17
+ ("00846", "ACT"), ("00890", "ACTIVITY"), ("00898", "ACTOR"), ("01011", "ADDRESS"),
18
+ ("00834", "ACROSS"), ("00624", "ACCIDENT"), ("00593", "ACCEPT"), ("00415", "ABOUT"),
19
+ ("00961", "ADD"), ("00962", "ADD"), ("00594", "ACCEPT"), ("00964", "ADD"),
20
+ ("00666", "ACCOMPLISH"), ("01065", "ADJECTIVE"), ("00628", "ACCIDENT"), ("00868", "ACTIVE"),
21
+ ("00836", "ACROSS"), ("00430", "ABOVE"), ("00835", "ACROSS"), ("00946", "ADAPT"),
22
+ ("00943", "ADAPT"), ("00414", "ABOUT"), ("00376", "ABLE"), ("00832", "ACROSS"),
23
+ ("00627", "ACCIDENT"), ("00592", "ACCEPT"), ("00625", "ACCIDENT"), ("01012", "ADDRESS"),
24
+ ("00849", "ACT"), ("00663", "ACCOMPLISH"), ("00853", "ACTION"), ("00967", "ADD"),
25
+ ("00692", "ACCOUNTANT"), ("00583", "ACCENT"), ("00341", "ACROSS"), ("00378", "ADDRESS"),
26
+ ("00433", "ADJECTIVE"), ("00384", "ACTOR"), ("00381", "ACTOR"), ("00377", "ACCIDENT"),
27
+ ("00382", "ACTOR"), ("00378", "ADDRESS")
28
  ]
29
+ SUPPORTED_GLOSSES_UNIQUE = sorted(list(set([g for _, g in SUPPORTED_VIDEOS])))
30
 
31
+ # 3. DATASET DISCOVERY AND MAPPING
32
+ print("Dataset Discovery: Mapping specific video IDs to Glosses...")
33
+ dataset_options = {}
34
+ for vid_id, gloss in SUPPORTED_VIDEOS:
35
+ # Construct the full HF path (assuming 5-digit ID)
36
+ hf_path = f"data/data_0/{vid_id.zfill(5)}.mp4"
37
+ display_name = f"{gloss} (Sample {vid_id})"
38
+ dataset_options[display_name] = hf_path
 
 
 
 
 
 
 
39
 
40
  # 4. INITIALIZE CLIENT
41
+ print(f"πŸ”Œ Attempting connection to {PRIVATE_SPACE}...")
42
  try:
43
+ # Use 'token=' (standard) instead of 'hf_token='
44
+ client = Client(PRIVATE_SPACE, token=HF_TOKEN)
45
+ print("βœ… Neural Engine Online!")
46
+ except Exception as e:
47
+ print(f"❌ Connection Failed: {e}")
48
  client = None
49
 
 
50
  # 5. LOGIC FUNCTIONS
51
  def update_video_display(selection):
52
+ if not selection: return None, None
 
53
  try:
 
54
  gloss_gt = selection.split('(')[0].strip()
55
 
56
+ # Download the video file to /tmp for local playback
57
  hf_path = dataset_options[selection]
58
  cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
59
  local_path = os.path.join("/tmp", os.path.basename(hf_path))
 
61
 
62
  return local_path, f"Ground Truth: {gloss_gt}"
63
  except Exception as e:
64
+ return None, f"Error downloading sample: {e}"
65
 
66
  def run_omnisign_vlm(video_path):
67
+ """
68
+ Submits the video to the private backend.
69
+ CRITICAL: Must use positional arguments for handle_file().
70
+ """
71
  if not video_path: return {"Error": "No input detected."}
72
  if not client: return {"Neural Engine Offline": 0.0}
73
+
74
  try:
75
+ # --- THE FIX IS HERE ---
76
+ # We pass handle_file(video_path) as the FIRST argument (positional).
77
+ # We do NOT use 'video_file=' or 'video=' as a keyword.
78
  result = client.predict(
79
+ handle_file(video_path),
80
+ api_name="/predict_sign"
81
  )
82
  return result
83
  except Exception as e:
84
+ return {f"Neural Analysis Failed: {str(e)}": 0.0}
85
 
86
+ # 6. UI DESIGN
87
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
88
  gr.Markdown(f"""
89
  # 🧠 OmniSign VLM: Universal SL Protocol
 
96
  - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
97
 
98
  ---
99
+ *Notice: This demonstration uses an unoptimized, limited vocabulary subset for structural proof-of-concept.*
100
  """)
101
 
102
  with gr.Row():
 
110
  value=""
111
  )
112
 
 
113
  gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
114
 
115
  run_btn = gr.Button("πŸš€ Execute Neural Analysis", variant="primary")
 
119
  output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
120
 
121
  with gr.Accordion("πŸ” View Supported Vocabulary List", open=True):
122
+ gr.Markdown(f"**This demo subset recognizes {len(SUPPORTED_GLOSSES_UNIQUE)} unique words:**")
123
+ gr.Markdown(", ".join(SUPPORTED_GLOSSES_UNIQUE))
124
 
125
  # Event Mapping
126
  dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])