Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,48 +6,54 @@ from huggingface_hub import hf_hub_download, list_repo_files
|
|
| 6 |
|
| 7 |
# 1. SECRETS & BACKEND LINK
|
| 8 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
-
# 2.
|
| 12 |
-
|
| 13 |
-
"ADAPT", "ADD", "
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
]
|
|
|
|
| 18 |
|
| 19 |
-
# 3. DATASET DISCOVERY
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
vid_id = os.path.basename(f_path).replace(".mp4", "")
|
| 28 |
-
# Filter for samples that match our supported list (for a clean demo)
|
| 29 |
-
if any(vid_id in str(s) for s in ["00944", "00963", "00335", "00689", "00842", "01064", "00416", "00947", "00377", "00832"]):
|
| 30 |
-
gloss_name = [g for g in SUPPORTED_GLOSSES if g.startswith(vid_id[1]) or g.endswith(vid_id[-1])][0] # Simple heuristic
|
| 31 |
-
dataset_options[f"{gloss_name} (Sample {vid_id})"] = f_path
|
| 32 |
-
except Exception as e:
|
| 33 |
-
dataset_options = {}
|
| 34 |
|
| 35 |
# 4. INITIALIZE CLIENT
|
|
|
|
| 36 |
try:
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
client = None
|
| 40 |
|
| 41 |
-
|
| 42 |
# 5. LOGIC FUNCTIONS
|
| 43 |
def update_video_display(selection):
|
| 44 |
-
|
| 45 |
-
if not selection: return None, None
|
| 46 |
try:
|
| 47 |
-
# Extract Ground Truth from dropdown display name
|
| 48 |
gloss_gt = selection.split('(')[0].strip()
|
| 49 |
|
| 50 |
-
# Download video file to /tmp for local playback
|
| 51 |
hf_path = dataset_options[selection]
|
| 52 |
cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
|
| 53 |
local_path = os.path.join("/tmp", os.path.basename(hf_path))
|
|
@@ -55,25 +61,29 @@ def update_video_display(selection):
|
|
| 55 |
|
| 56 |
return local_path, f"Ground Truth: {gloss_gt}"
|
| 57 |
except Exception as e:
|
| 58 |
-
return None, f"Error: {e}"
|
| 59 |
|
| 60 |
def run_omnisign_vlm(video_path):
|
| 61 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 62 |
if not video_path: return {"Error": "No input detected."}
|
| 63 |
if not client: return {"Neural Engine Offline": 0.0}
|
| 64 |
-
|
| 65 |
try:
|
| 66 |
-
#
|
| 67 |
-
#
|
|
|
|
| 68 |
result = client.predict(
|
| 69 |
-
handle_file(video_path),
|
| 70 |
-
api_name="/predict_sign"
|
| 71 |
)
|
| 72 |
return result
|
| 73 |
except Exception as e:
|
| 74 |
-
return {f"Neural
|
| 75 |
|
| 76 |
-
# 6. UI DESIGN
|
| 77 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 78 |
gr.Markdown(f"""
|
| 79 |
# π§ OmniSign VLM: Universal SL Protocol
|
|
@@ -86,7 +96,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 86 |
- **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
|
| 87 |
|
| 88 |
---
|
| 89 |
-
*Notice: This
|
| 90 |
""")
|
| 91 |
|
| 92 |
with gr.Row():
|
|
@@ -100,7 +110,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 100 |
value=""
|
| 101 |
)
|
| 102 |
|
| 103 |
-
# Ground Truth Display
|
| 104 |
gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
|
| 105 |
|
| 106 |
run_btn = gr.Button("π Execute Neural Analysis", variant="primary")
|
|
@@ -110,8 +119,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 110 |
output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
|
| 111 |
|
| 112 |
with gr.Accordion("π View Supported Vocabulary List", open=True):
|
| 113 |
-
gr.Markdown(f"**This demo subset recognizes {len(
|
| 114 |
-
gr.Markdown(", ".join(
|
| 115 |
|
| 116 |
# Event Mapping
|
| 117 |
dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])
|
|
|
|
| 6 |
|
| 7 |
# 1. SECRETS & BACKEND LINK
|
| 8 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 9 |
+
# Make sure this matches your private space URL exactly
|
| 10 |
+
PRIVATE_SPACE = "st192011/ASL-VLS-Private"
|
| 11 |
|
| 12 |
+
# 2. DEFINITIVE SUPPORTED VOCABULARY LIST
|
| 13 |
+
SUPPORTED_VIDEOS = [
|
| 14 |
+
("00944", "ADAPT"), ("00963", "ADD"), ("01064", "ADJECTIVE"), ("00335", "ABDOMEN"),
|
| 15 |
+
("00689", "ACCOUNTANT"), ("00899", "ACTOR"), ("00584", "ACCENT"), ("00632", "ACCIDENT"),
|
| 16 |
+
("00586", "ACCENT"), ("00585", "ACCENT"), ("00626", "ACCIDENT"), ("00623", "ACCIDENT"),
|
| 17 |
+
("00846", "ACT"), ("00890", "ACTIVITY"), ("00898", "ACTOR"), ("01011", "ADDRESS"),
|
| 18 |
+
("00834", "ACROSS"), ("00624", "ACCIDENT"), ("00593", "ACCEPT"), ("00415", "ABOUT"),
|
| 19 |
+
("00961", "ADD"), ("00962", "ADD"), ("00594", "ACCEPT"), ("00964", "ADD"),
|
| 20 |
+
("00666", "ACCOMPLISH"), ("01065", "ADJECTIVE"), ("00628", "ACCIDENT"), ("00868", "ACTIVE"),
|
| 21 |
+
("00836", "ACROSS"), ("00430", "ABOVE"), ("00835", "ACROSS"), ("00946", "ADAPT"),
|
| 22 |
+
("00943", "ADAPT"), ("00414", "ABOUT"), ("00376", "ABLE"), ("00832", "ACROSS"),
|
| 23 |
+
("00627", "ACCIDENT"), ("00592", "ACCEPT"), ("00625", "ACCIDENT"), ("01012", "ADDRESS"),
|
| 24 |
+
("00849", "ACT"), ("00663", "ACCOMPLISH"), ("00853", "ACTION"), ("00967", "ADD"),
|
| 25 |
+
("00692", "ACCOUNTANT"), ("00583", "ACCENT"), ("00341", "ACROSS"), ("00378", "ADDRESS"),
|
| 26 |
+
("00433", "ADJECTIVE"), ("00384", "ACTOR"), ("00381", "ACTOR"), ("00377", "ACCIDENT"),
|
| 27 |
+
("00382", "ACTOR"), ("00378", "ADDRESS")
|
| 28 |
]
|
| 29 |
+
SUPPORTED_GLOSSES_UNIQUE = sorted(list(set([g for _, g in SUPPORTED_VIDEOS])))
|
| 30 |
|
| 31 |
+
# 3. DATASET DISCOVERY AND MAPPING
|
| 32 |
+
print("Dataset Discovery: Mapping specific video IDs to Glosses...")
|
| 33 |
+
dataset_options = {}
|
| 34 |
+
for vid_id, gloss in SUPPORTED_VIDEOS:
|
| 35 |
+
# Construct the full HF path (assuming 5-digit ID)
|
| 36 |
+
hf_path = f"data/data_0/{vid_id.zfill(5)}.mp4"
|
| 37 |
+
display_name = f"{gloss} (Sample {vid_id})"
|
| 38 |
+
dataset_options[display_name] = hf_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# 4. INITIALIZE CLIENT
|
| 41 |
+
print(f"π Attempting connection to {PRIVATE_SPACE}...")
|
| 42 |
try:
|
| 43 |
+
# Use 'token=' (standard) instead of 'hf_token='
|
| 44 |
+
client = Client(PRIVATE_SPACE, token=HF_TOKEN)
|
| 45 |
+
print("β
Neural Engine Online!")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"β Connection Failed: {e}")
|
| 48 |
client = None
|
| 49 |
|
|
|
|
| 50 |
# 5. LOGIC FUNCTIONS
|
| 51 |
def update_video_display(selection):
|
| 52 |
+
if not selection: return None, None
|
|
|
|
| 53 |
try:
|
|
|
|
| 54 |
gloss_gt = selection.split('(')[0].strip()
|
| 55 |
|
| 56 |
+
# Download the video file to /tmp for local playback
|
| 57 |
hf_path = dataset_options[selection]
|
| 58 |
cache_path = hf_hub_download(repo_id="Voxel51/WLASL", filename=hf_path, repo_type="dataset")
|
| 59 |
local_path = os.path.join("/tmp", os.path.basename(hf_path))
|
|
|
|
| 61 |
|
| 62 |
return local_path, f"Ground Truth: {gloss_gt}"
|
| 63 |
except Exception as e:
|
| 64 |
+
return None, f"Error downloading sample: {e}"
|
| 65 |
|
| 66 |
def run_omnisign_vlm(video_path):
|
| 67 |
+
"""
|
| 68 |
+
Submits the video to the private backend.
|
| 69 |
+
CRITICAL: Must use positional arguments for handle_file().
|
| 70 |
+
"""
|
| 71 |
if not video_path: return {"Error": "No input detected."}
|
| 72 |
if not client: return {"Neural Engine Offline": 0.0}
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
+
# --- THE FIX IS HERE ---
|
| 76 |
+
# We pass handle_file(video_path) as the FIRST argument (positional).
|
| 77 |
+
# We do NOT use 'video_file=' or 'video=' as a keyword.
|
| 78 |
result = client.predict(
|
| 79 |
+
handle_file(video_path),
|
| 80 |
+
api_name="/predict_sign"
|
| 81 |
)
|
| 82 |
return result
|
| 83 |
except Exception as e:
|
| 84 |
+
return {f"Neural Analysis Failed: {str(e)}": 0.0}
|
| 85 |
|
| 86 |
+
# 6. UI DESIGN
|
| 87 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 88 |
gr.Markdown(f"""
|
| 89 |
# π§ OmniSign VLM: Universal SL Protocol
|
|
|
|
| 96 |
- **Lexical Agnostic protocol:** Capable of instant updates to any sign language (Universal SL).
|
| 97 |
|
| 98 |
---
|
| 99 |
+
*Notice: This demonstration uses an unoptimized, limited vocabulary subset for structural proof-of-concept.*
|
| 100 |
""")
|
| 101 |
|
| 102 |
with gr.Row():
|
|
|
|
| 110 |
value=""
|
| 111 |
)
|
| 112 |
|
|
|
|
| 113 |
gt_output = gr.Textbox(label="Ground Truth", interactive=False, value="Select a sample above to view its Ground Truth.")
|
| 114 |
|
| 115 |
run_btn = gr.Button("π Execute Neural Analysis", variant="primary")
|
|
|
|
| 119 |
output_label = gr.Label(num_top_classes=3, label="VLM Confidence Output")
|
| 120 |
|
| 121 |
with gr.Accordion("π View Supported Vocabulary List", open=True):
|
| 122 |
+
gr.Markdown(f"**This demo subset recognizes {len(SUPPORTED_GLOSSES_UNIQUE)} unique words:**")
|
| 123 |
+
gr.Markdown(", ".join(SUPPORTED_GLOSSES_UNIQUE))
|
| 124 |
|
| 125 |
# Event Mapping
|
| 126 |
dataset_drop.change(fn=update_video_display, inputs=dataset_drop, outputs=[video_comp, gt_output])
|