st192011 commited on
Commit
6bf42f7
Β·
verified Β·
1 Parent(s): ed5073f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -44
app.py CHANGED
@@ -1,91 +1,109 @@
1
  import os
 
2
  import json
3
  import gradio as gr
4
  from gradio_client import Client, handle_file
5
  from huggingface_hub import hf_hub_download, list_repo_files
6
 
7
- # 1. CONFIG & PRIVATE LINK
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
  PRIVATE_SPACE = "st192011/ASL-VLS-Private"
10
 
11
  try:
12
  api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
13
- except:
 
14
  api_client = None
15
 
16
- # 2. DATASET PREP
17
- print("Fetching dataset library...")
 
 
 
 
 
 
 
 
 
18
  all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
 
19
  data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
20
-
21
- # Create a clean display name map (using ID as key for simplicity in demo)
22
  dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
23
 
24
- # 3. UI LOGIC
25
  def update_video_display(selection):
26
- """Downloads the selected file and shows it in the Video player"""
27
  if not selection: return None
 
28
  hf_path = dataset_choices[selection]
29
- local_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
 
 
 
 
 
 
30
  return local_path
31
 
32
- def run_omnisign(video):
33
- if not video: return {"Error: No Input": 0.0}
34
- if not api_client: return {"Error: Engine Offline": 0.0}
 
35
 
36
  try:
37
- # Correct Protocol for Space-to-Space file transfer
38
  result = api_client.predict(
39
  video_file=handle_file(video),
40
  api_name="/predict"
41
  )
42
  return result
43
- except:
44
- return {"Neural Engine Timeout": 0.0}
45
 
46
- # 4. THE INTERFACE (PITCH MODE)
47
- with gr.Blocks(theme="monochrome") as demo:
48
- gr.Markdown("""
49
  # 🧠 OmniSign VLM
50
- ### **The Future of Universal Motion Recognition**
 
 
 
51
 
52
- OmniSign is a proprietary neural system powered by **Large Vision-Language Models (VLM)**.
53
- Unlike traditional AI that requires massive specific datasets, our **Neural Transduction**
54
- technology generalizes across signers, environments, and devices instantly.
 
55
 
56
- **Key Advantages:**
57
- - **Zero-Shot Adaptation:** Recognizes signs regardless of background or signer identity.
58
- - **Instant Lexical Scaling:** Vocabulary can be updated in seconds without retraining.
59
- - **Temporal Precision:** Deep analysis of high-density motion trajectories.
60
  """)
61
 
62
  with gr.Row():
63
  with gr.Column():
64
- gr.Markdown("### 🎦 1. Input Stream")
65
- video_display = gr.Video(label="Active Video Buffer")
 
66
 
67
- with gr.Group():
68
- dataset_drop = gr.Dropdown(
69
- choices=[""] + sorted(list(dataset_choices.keys())),
70
- label="Select Sample from WLASL Archive"
71
- )
72
 
73
- run_btn = gr.Button("πŸš€ Start Neural Analysis", variant="primary")
 
74
 
75
  with gr.Column():
76
- gr.Markdown("### πŸ“Š 2. Lexical Prediction")
77
- output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
78
 
79
- gr.Markdown("""
80
- *This demonstration operates on a high-frequency ASL subset. The engine is
81
- designed for cross-language universal sign interpretation.*
82
- """)
83
 
84
- # Event: When dropdown changes, update the video player
85
  dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
86
-
87
- # Event: When button clicked, analyze the video currently in the player
88
- run_btn.click(fn=run_omnisign, inputs=video_display, outputs=output_label)
89
 
90
  if __name__ == "__main__":
91
  demo.launch()
 
1
  import os
2
+ import shutil
3
  import json
4
  import gradio as gr
5
  from gradio_client import Client, handle_file
6
  from huggingface_hub import hf_hub_download, list_repo_files
7
 
8
+ # 1. SECRETS & BACKEND LINK
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
  PRIVATE_SPACE = "st192011/ASL-VLS-Private"
11
 
12
  try:
13
  api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
14
+ except Exception as e:
15
+ print(f"Connection Error: {e}")
16
  api_client = None
17
 
18
+ # 2. LOAD SUPPORTED GLOSSARY
19
+ # We load the JSON just to get the list of words for the UI
20
+ KB_FILE = "asl_rag_knowledge_base.json"
21
+ supported_glosses = []
22
+ if os.path.exists(KB_FILE):
23
+ with open(KB_FILE, 'r') as f:
24
+ kb_data = json.load(f)
25
+ supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
26
+
27
+ # 3. DATASET DISCOVERY
28
+ print("Syncing with WLASL Archive...")
29
  all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
30
+ # Filter for data_0 videos only
31
  data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
 
 
32
  dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
33
 
34
+ # 4. LOGIC FUNCTIONS
35
  def update_video_display(selection):
36
+ """Downloads the file and moves it to a Gradio-accessible directory"""
37
  if not selection: return None
38
+
39
  hf_path = dataset_choices[selection]
40
+ # Download from HF cache
41
+ cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
42
+
43
+ # FIX: Copy to /tmp to bypass Gradio InvalidPathError
44
+ local_path = os.path.join("/tmp", selection)
45
+ shutil.copy(cache_path, local_path)
46
+
47
  return local_path
48
 
49
+ def run_omnisign_analysis(video):
50
+ """Sends the active video (sample or user-recorded) to the private VLM engine"""
51
+ if not video: return {"Error: No Video Detected": 0.0}
52
+ if not api_client: return {"Error: Neural Backend Offline": 0.0}
53
 
54
  try:
55
+ # Use handle_file to safely stream the video to the private space
56
  result = api_client.predict(
57
  video_file=handle_file(video),
58
  api_name="/predict"
59
  )
60
  return result
61
+ except Exception as e:
62
+ return {f"Neural Engine Error: {str(e)}": 0.0}
63
 
64
+ # 5. UI DESIGN (PITCH FORMAT)
65
+ with gr.Blocks(theme="glass") as demo:
66
+ gr.Markdown(f"""
67
  # 🧠 OmniSign VLM
68
+ ### **Universal Neural Sign Language Protocol**
69
+
70
+ This demonstration introduces a revolutionary **VLM-based architecture** for sign language interpretation.
71
+ Unlike traditional models that are prone to overfitting, the OmniSign protocol leverages **Temporal Neural Transduction** to generalize across all environments and signers instantly.
72
 
73
+ **Proprietary Core Advantages:**
74
+ * **Universal Generalization:** Robust performance in any environment, lighting, or camera angle.
75
+ * **Instant Lexical Scaling:** The protocol allows for adding any new sign language word instantly without retraining.
76
+ * **Person-Agnostic Reasoning:** The system analyzes movement logic rather than memorizing specific signers.
77
 
78
+ ---
79
+ *Notice: This is a structural demonstration. The current engine is non-optimized and operates on a limited vocabulary subset.*
 
 
80
  """)
81
 
82
  with gr.Row():
83
  with gr.Column():
84
+ gr.Markdown("### 🎦 1. Input Interface")
85
+ # This player handles BOTH uploads and the samples from the dropdown
86
+ video_display = gr.Video(label="Neural Input Buffer")
87
 
88
+ dataset_drop = gr.Dropdown(
89
+ choices=[""] + sorted(list(dataset_choices.keys())),
90
+ label="Explore WLASL data_0 Samples"
91
+ )
 
92
 
93
+ gr.Markdown("*Tip: Select a sample above to watch it, then sign it yourself or analyze the sample.*")
94
+ run_btn = gr.Button("πŸš€ Execute Neural Analysis", variant="primary")
95
 
96
  with gr.Column():
97
+ gr.Markdown("### πŸ“Š 2. VLM Perception Output")
98
+ output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
99
 
100
+ with gr.Accordion("πŸ” Supported Glossary", open=True):
101
+ gr.Markdown(f"**The system currently recognizes {len(supported_glosses)} signs:**")
102
+ gr.Markdown(", ".join(supported_glosses))
 
103
 
104
+ # Event Mapping
105
  dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
106
+ run_btn.click(fn=run_omnisign_analysis, inputs=video_display, outputs=output_label)
 
 
107
 
108
  if __name__ == "__main__":
109
  demo.launch()