st192011 commited on
Commit
43cfccd
Β·
verified Β·
1 Parent(s): c450334

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -44
app.py CHANGED
@@ -5,104 +5,122 @@ import gradio as gr
5
  from gradio_client import Client, handle_file
6
  from huggingface_hub import hf_hub_download, list_repo_files
7
 
8
- # 1. SECRETS
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
  PRIVATE_SPACE = "st192011/ASL-VLS-Private"
11
 
12
- # 2. LOAD SUPPORTED GLOSSARY (UI only)
 
 
 
 
 
 
 
13
  KB_FILE = "asl_rag_knowledge_base.json"
14
  supported_glosses = []
15
  if os.path.exists(KB_FILE):
16
- try:
17
- with open(KB_FILE, 'r') as f:
18
- kb_data = json.load(f)
19
- supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
20
- except:
21
- supported_glosses = ["Error loading glossary"]
22
 
23
- # 3. DATASET DISCOVERY
24
- print("Syncing with WLASL Archive...")
25
  try:
26
  all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
27
  data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
28
  dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
29
- except:
 
30
  dataset_choices = {}
31
 
32
- # 4. LOGIC FUNCTIONS
33
  def update_video_display(selection):
 
34
  if not selection: return None
35
  try:
36
  hf_path = dataset_choices[selection]
 
37
  cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
 
38
  local_path = os.path.join("/tmp", selection)
39
  shutil.copy(cache_path, local_path)
40
  return local_path
41
  except Exception as e:
42
- print(f"File Error: {e}")
43
  return None
44
 
45
- def run_omnisign_analysis(video):
46
- if not video:
47
- return {"Error": "No video input detected."}
48
-
49
- # LAZY LOADING: Initialize client here to avoid startup crashes
50
- try:
51
- api_client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
52
- except Exception as e:
53
- return {"Connection Error": f"Could not reach private engine. Please ensure it is running. ({str(e)})"}
54
 
 
 
 
 
 
 
 
55
  try:
56
- # Pass the video file to the private space
57
- result = api_client.predict(
58
- video_file=handle_file(video),
59
- api_name="/predict"
 
60
  )
61
  return result
62
  except Exception as e:
63
- return {"Processing Error": f"The neural engine timed out or failed: {str(e)}"}
64
 
65
- # 5. UI DESIGN (PITCH FORMAT)
66
- with gr.Blocks(theme="glass") as demo:
67
  gr.Markdown(f"""
68
  # 🧠 OmniSign VLM
69
  ### **Universal Neural Sign Language Protocol**
70
 
71
- OmniSign is a proprietary architecture for sign language interpretation powered by **Large Vision-Language Models (VLM)**.
72
- Our **Temporal Neural Transduction** protocol enables zero-shot generalization across signers and environments.
73
 
74
- **Core Advantages:**
75
- * **Universal Generalization:** High performance regardless of lighting, background, or camera.
76
- * **Instant Lexical Scaling:** Vocabulary updates in seconds via semantic indexing.
77
- * **Person-Agnostic:** Analyzes movement logic rather than memorizing individual signers.
78
 
79
  ---
80
- *Notice: This is a structural demonstration. The engine is currently non-optimized and operates on a limited vocabulary.*
81
  """)
82
 
83
  with gr.Row():
84
  with gr.Column():
85
- gr.Markdown("### 🎦 1. Input Interface")
86
- video_display = gr.Video(label="Neural Input Buffer")
87
 
88
  dataset_drop = gr.Dropdown(
89
  choices=[""] + sorted(list(dataset_choices.keys())),
90
- label="Explore WLASL data_0 Samples (Verified Support)",
91
  value=""
92
  )
93
 
 
 
 
94
  run_btn = gr.Button("πŸš€ Execute Neural Analysis", variant="primary")
95
 
96
  with gr.Column():
97
- gr.Markdown("### πŸ“Š 2. VLM Perception Output")
98
  output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
99
 
100
- with gr.Accordion("πŸ” Supported Glossary", open=True):
101
  gr.Markdown(", ".join(supported_glosses))
102
 
103
- dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_display)
104
- run_btn.click(fn=run_omnisign_analysis, inputs=video_display, outputs=output_label)
 
 
 
105
 
106
  if __name__ == "__main__":
107
- # Set ssr_mode=False to improve stability on Hugging Face
108
  demo.launch(ssr_mode=False)
 
5
  from gradio_client import Client, handle_file
6
  from huggingface_hub import hf_hub_download, list_repo_files
7
 
8
+ # 1. AUTHENTICATION
9
+ # Ensure HF_TOKEN is in your Space Secrets
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  PRIVATE_SPACE = "st192011/ASL-VLS-Private"
12
 
13
+ # Initialize client globally but handle reconnection logic
14
+ try:
15
+ client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
16
+ except Exception as e:
17
+ print(f"Initial connection failed: {e}")
18
+ client = None
19
+
20
+ # 2. UI GLOSSARY (Load from the uploaded JSON)
21
  KB_FILE = "asl_rag_knowledge_base.json"
22
  supported_glosses = []
23
  if os.path.exists(KB_FILE):
24
+ with open(KB_FILE, 'r') as f:
25
+ kb_data = json.load(f)
26
+ supported_glosses = sorted(list(set([item['gloss'].upper() for item in kb_data])))
 
 
 
27
 
28
+ # 3. DATASET DISCOVERY (WLASL data_0)
29
+ print("Discovery: Syncing with WLASL Dataset...")
30
  try:
31
  all_files = list_repo_files(repo_id="Voxel51/WLASL", repo_type="dataset")
32
  data_0_mp4s = [f for f in all_files if f.startswith("data/data_0/") and f.endswith(".mp4")]
33
  dataset_choices = {os.path.basename(f): f for f in data_0_mp4s}
34
+ except Exception as e:
35
+ print(f"Repo listing failed: {e}")
36
  dataset_choices = {}
37
 
38
+ # 4. LOGIC
39
  def update_video_display(selection):
40
+ """Downloads sample and moves to local /tmp for playback access"""
41
  if not selection: return None
42
  try:
43
  hf_path = dataset_choices[selection]
44
+ # Download to HF cache
45
  cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
46
+ # Move to /tmp so Gradio can play it
47
  local_path = os.path.join("/tmp", selection)
48
  shutil.copy(cache_path, local_path)
49
  return local_path
50
  except Exception as e:
51
+ print(f"Playback error: {e}")
52
  return None
53
 
54
+ def run_omnisign_vlm(video_path):
55
+ """Sends video to private VLM engine using handle_file protocol"""
56
+ if not video_path:
57
+ return {"Error": "No input detected."}
 
 
 
 
 
58
 
59
+ global client
60
+ if client is None:
61
+ try:
62
+ client = Client(PRIVATE_SPACE, hf_token=HF_TOKEN)
63
+ except:
64
+ return {"Neural Engine Offline": 0.0}
65
+
66
  try:
67
+ # The key: Use handle_file to wrap the path for the API
68
+ # We call the explicit api_name we set in the private space
69
+ result = client.predict(
70
+ video_file=handle_file(video_path),
71
+ api_name="/predict_sign"
72
  )
73
  return result
74
  except Exception as e:
75
+ return {f"Neural Analysis Failed: {str(e)}": 0.0}
76
 
77
+ # 5. UI DESIGN (Pitch Presentation)
78
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
  gr.Markdown(f"""
80
  # 🧠 OmniSign VLM
81
  ### **Universal Neural Sign Language Protocol**
82
 
83
+ OmniSign is an advanced structural demonstration of **Large Vision-Language Model (VLM)** capabilities applied to human kinetic semantics.
84
+ Our protocol uses **Temporal Neural Transduction** to interpret sign language without the limitations of traditional, person-specific training.
85
 
86
+ **Technology Highlights:**
87
+ - **Zero-Shot Environmental Adaption:** Works across any lighting or background.
88
+ - **Lexical Agnostic protocol:** Capable of instant updates to any sign language (ASL, BSL, etc.) without retraining.
89
+ - **Human-Independent Reasoning:** Focuses on movement logic rather than signer identity.
90
 
91
  ---
92
+ *Notice: This demonstration uses an unoptimized, limited vocabulary subset for structural proof-of-concept.*
93
  """)
94
 
95
  with gr.Row():
96
  with gr.Column():
97
+ gr.Markdown("### 🎦 1. Select Input")
98
+ video_comp = gr.Video(label="Input Buffer", autoplay=True)
99
 
100
  dataset_drop = gr.Dropdown(
101
  choices=[""] + sorted(list(dataset_choices.keys())),
102
+ label="Explore Dataset Samples (Verified Support)",
103
  value=""
104
  )
105
 
106
+ gr.Markdown("""*Choose a sample to watch it in the buffer. You can then click analyze,
107
+ or record your own version of that word to test the VLM's robustness.*""")
108
+
109
  run_btn = gr.Button("πŸš€ Execute Neural Analysis", variant="primary")
110
 
111
  with gr.Column():
112
+ gr.Markdown("### πŸ“Š 2. VLM Perception Result")
113
  output_label = gr.Label(num_top_classes=3, label="Neural Confidence Score")
114
 
115
+ with gr.Accordion("πŸ” View Supported Vocabulary", open=True):
116
  gr.Markdown(", ".join(supported_glosses))
117
 
118
+ # Link Dropdown to Video Player
119
+ dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=video_comp)
120
+
121
+ # Link Analyze Button to Private API
122
+ run_btn.click(fn=run_omnisign_vlm, inputs=video_comp, outputs=output_label)
123
 
124
  if __name__ == "__main__":
125
+ # Disabling ssr_mode resolves the "Invalid file descriptor" issue in asyncio
126
  demo.launch(ssr_mode=False)