aziraarshad commited on
Commit
6d3215d
·
verified ·
1 Parent(s): 14d749f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import torch
6
  import torch.nn as nn
7
  import mediapipe as mp
 
8
 
9
  # ----------------------------
10
  # Load labels (labels.json)
@@ -252,23 +253,33 @@ def run(frame, sequence_state):
252
 
253
  return out_rgb, probs_dict, sequence_state
254
 
255
- with gr.Blocks() as demo:
256
- gr.Markdown("# Live Sign Language Gesture Demo (CNN-LSTM + Multi-Head Attention)")
257
- gr.Markdown("Show your hand gesture to the webcam. Prediction starts after 30 frames are collected.")
258
 
259
- seq_state = gr.State([])
 
260
 
261
- with gr.Row():
262
- cam = gr.Webcam(streaming=True, label="Webcam")
263
- out_img = gr.Image(type="numpy", label="Output (Annotated)")
264
 
265
- out_label = gr.Label(num_top_classes=5, label="Probabilities (Top 5)")
 
 
266
 
267
- cam.stream(
268
- fn=run,
269
- inputs=[cam, seq_state],
270
- outputs=[out_img, out_label, seq_state],
271
- )
 
 
 
 
 
 
 
 
 
272
 
273
  if __name__ == "__main__":
274
- demo.launch()
 
5
  import torch
6
  import torch.nn as nn
7
  import mediapipe as mp
8
+ from PIL import Image
9
 
10
  # ----------------------------
11
  # Load labels (labels.json)
 
253
 
254
  return out_rgb, probs_dict, sequence_state
255
 
256
+ # Legacy Gradio-compatible state
257
+ sequence_state = []
 
258
 
259
+ def predict(frame):
260
+ global sequence_state
261
 
262
+ if frame is None:
263
+ return None, {"(no frame)": 1.0}
 
264
 
265
+ # Sometimes legacy Gradio provides PIL
266
+ if isinstance(frame, Image.Image):
267
+ frame = np.array(frame)
268
 
269
+ out_img, probs_dict, sequence_state = run(frame, sequence_state)
270
+ return out_img, probs_dict
271
+
272
+ demo = gr.Interface(
273
+ fn=predict,
274
+ inputs=gr.inputs.Image(source="webcam", type="numpy", label="Webcam"),
275
+ outputs=[
276
+ gr.outputs.Image(type="numpy", label="Output (Annotated)"),
277
+ gr.outputs.Label(num_top_classes=5, label="Probabilities (Top 5)"),
278
+ ],
279
+ title="Live Sign Language Gesture Demo (CNN-LSTM + Multi-Head Attention)",
280
+ description="Show your hand gesture to the webcam. Prediction starts after 30 frames are collected.",
281
+ live=True,
282
+ )
283
 
284
  if __name__ == "__main__":
285
+ demo.launch()