arshtech commited on
Commit
b37ba50
Β·
verified Β·
1 Parent(s): c261f0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -8,7 +8,6 @@ import json
8
  import time
9
  from gtts import gTTS
10
  import tempfile
11
- from PIL import Image
12
  import mediapipe as mp
13
  from mediapipe import solutions
14
 
@@ -19,7 +18,6 @@ with open("gestures_rules.json", "r") as f:
19
  # ---------------- MEDIAPIPE (CPU ONLY) ----------------
20
  mp_hands = solutions.hands
21
  hands = mp_hands.Hands(
22
- static_image_mode=False,
23
  max_num_hands=1,
24
  min_detection_confidence=0.7,
25
  min_tracking_confidence=0.7
@@ -36,9 +34,9 @@ def get_finger_states(hand_landmarks):
36
  ]
37
 
38
  def detect_gesture(states):
39
- for k, v in gesture_data.items():
40
- if v["pattern"] == states:
41
- return k
42
  return None
43
 
44
  def speak_text(text):
@@ -47,8 +45,8 @@ def speak_text(text):
47
  tts.save(f.name)
48
  return f.name
49
 
50
- # ---------------- VIDEO PROCESS ----------------
51
- def process_video(frame, sentence, last_char, last_time):
52
  if frame is None:
53
  return frame, sentence, last_char, last_time
54
 
@@ -85,14 +83,14 @@ with gr.Blocks(title="Hand2Voice") as demo:
85
  gr.Markdown("## 🀟 Hand2Voice – Gesture to Speech")
86
 
87
  with gr.Row():
88
- with gr.Column(scale=1):
89
- webcam = gr.Video(
90
- sources=["webcam"],
91
- streaming=True,
92
- height=360
93
  )
94
 
95
- with gr.Column(scale=1):
96
  output = gr.HTML("<h3>Waiting for gestures...</h3>")
97
  speak_btn = gr.Button("πŸ”Š Speak")
98
  clear_btn = gr.Button("🧹 Clear")
@@ -102,8 +100,8 @@ with gr.Blocks(title="Hand2Voice") as demo:
102
  last_char = gr.State("")
103
  last_time = gr.State(0.0)
104
 
105
- webcam.stream(
106
- process_video,
107
  inputs=[webcam, sentence, last_char, last_time],
108
  outputs=[webcam, sentence, last_char, last_time]
109
  ).then(
 
8
  import time
9
  from gtts import gTTS
10
  import tempfile
 
11
  import mediapipe as mp
12
  from mediapipe import solutions
13
 
 
18
  # ---------------- MEDIAPIPE (CPU ONLY) ----------------
19
  mp_hands = solutions.hands
20
  hands = mp_hands.Hands(
 
21
  max_num_hands=1,
22
  min_detection_confidence=0.7,
23
  min_tracking_confidence=0.7
 
34
  ]
35
 
36
  def detect_gesture(states):
37
+ for name, rule in gesture_data.items():
38
+ if rule["pattern"] == states:
39
+ return name
40
  return None
41
 
42
  def speak_text(text):
 
45
  tts.save(f.name)
46
  return f.name
47
 
48
+ # ---------------- FRAME PROCESS ----------------
49
+ def process_frame(frame, sentence, last_char, last_time):
50
  if frame is None:
51
  return frame, sentence, last_char, last_time
52
 
 
83
  gr.Markdown("## 🀟 Hand2Voice – Gesture to Speech")
84
 
85
  with gr.Row():
86
+ with gr.Column():
87
+ webcam = gr.Image(
88
+ label="Webcam",
89
+ type="numpy",
90
+ live=True
91
  )
92
 
93
+ with gr.Column():
94
  output = gr.HTML("<h3>Waiting for gestures...</h3>")
95
  speak_btn = gr.Button("πŸ”Š Speak")
96
  clear_btn = gr.Button("🧹 Clear")
 
100
  last_char = gr.State("")
101
  last_time = gr.State(0.0)
102
 
103
+ webcam.change(
104
+ process_frame,
105
  inputs=[webcam, sentence, last_char, last_time],
106
  outputs=[webcam, sentence, last_char, last_time]
107
  ).then(