Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import gradio as gr
|
|
| 5 |
import torch
|
| 6 |
import torch.nn as nn
|
| 7 |
import mediapipe as mp
|
|
|
|
| 8 |
|
| 9 |
# ----------------------------
|
| 10 |
# Load labels (labels.json)
|
|
@@ -252,23 +253,33 @@ def run(frame, sequence_state):
|
|
| 252 |
|
| 253 |
return out_rgb, probs_dict, sequence_state
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
gr.Markdown("Show your hand gesture to the webcam. Prediction starts after 30 frames are collected.")
|
| 258 |
|
| 259 |
-
|
|
|
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
out_img = gr.Image(type="numpy", label="Output (Annotated)")
|
| 264 |
|
| 265 |
-
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
if __name__ == "__main__":
|
| 274 |
-
demo.launch()
|
|
|
|
| 5 |
import torch
|
| 6 |
import torch.nn as nn
|
| 7 |
import mediapipe as mp
|
| 8 |
+
from PIL import Image
|
| 9 |
|
| 10 |
# ----------------------------
|
| 11 |
# Load labels (labels.json)
|
|
|
|
| 253 |
|
| 254 |
return out_rgb, probs_dict, sequence_state
|
| 255 |
|
| 256 |
+
# Legacy Gradio-compatible state
|
| 257 |
+
sequence_state = []
|
|
|
|
| 258 |
|
| 259 |
+
def predict(frame):
|
| 260 |
+
global sequence_state
|
| 261 |
|
| 262 |
+
if frame is None:
|
| 263 |
+
return None, {"(no frame)": 1.0}
|
|
|
|
| 264 |
|
| 265 |
+
# Sometimes legacy Gradio provides PIL
|
| 266 |
+
if isinstance(frame, Image.Image):
|
| 267 |
+
frame = np.array(frame)
|
| 268 |
|
| 269 |
+
out_img, probs_dict, sequence_state = run(frame, sequence_state)
|
| 270 |
+
return out_img, probs_dict
|
| 271 |
+
|
| 272 |
+
demo = gr.Interface(
|
| 273 |
+
fn=predict,
|
| 274 |
+
inputs=gr.inputs.Image(source="webcam", type="numpy", label="Webcam"),
|
| 275 |
+
outputs=[
|
| 276 |
+
gr.outputs.Image(type="numpy", label="Output (Annotated)"),
|
| 277 |
+
gr.outputs.Label(num_top_classes=5, label="Probabilities (Top 5)"),
|
| 278 |
+
],
|
| 279 |
+
title="Live Sign Language Gesture Demo (CNN-LSTM + Multi-Head Attention)",
|
| 280 |
+
description="Show your hand gesture to the webcam. Prediction starts after 30 frames are collected.",
|
| 281 |
+
live=True,
|
| 282 |
+
)
|
| 283 |
|
| 284 |
if __name__ == "__main__":
|
| 285 |
+
demo.launch()
|