arshtech commited on
Commit
16412d8
·
verified ·
1 Parent(s): ba003f4

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +88 -0
  2. gestures_rules.json +31 -0
  3. requirements.txt +7 -0
  4. styles.css +1 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import cv2
4
+ import mediapipe as mp
5
+ import numpy as np
6
+ import json
7
+ import time
8
+ from gtts import gTTS
9
+ import tempfile
10
+ import requests
11
+ from PIL import Image
12
+ from io import BytesIO
13
+
14
+ with open("gestures_rules.json", "r") as f:
15
+ gesture_data = json.load(f)["gestures"]
16
+
17
+ mp_hands = mp.solutions.hands
18
+ hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.7)
19
+
20
+ def get_finger_states(hand_landmarks):
21
+ tips = [4, 8, 12, 16, 20]
22
+ pips = [2, 6, 10, 14, 18]
23
+ return [1 if hand_landmarks.landmark[t].y < hand_landmarks.landmark[p].y else 0 for t, p in zip(tips, pips)]
24
+
25
+ def detect_gesture(states):
26
+ for name, rule in gesture_data.items():
27
+ if rule["pattern"] == states:
28
+ return name
29
+ return None
30
+
31
+ def speak_text(text):
32
+ tts = gTTS(text=text)
33
+ f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
34
+ tts.save(f.name)
35
+ return f.name
36
+
37
+ def process(frame, sentence, last_char, last_time):
38
+ if frame is None:
39
+ return frame, sentence, last_char, last_time
40
+ frame = cv2.flip(frame, 1)
41
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
42
+ res = hands.process(rgb)
43
+ if res.multi_hand_landmarks:
44
+ states = get_finger_states(res.multi_hand_landmarks[0])
45
+ char = detect_gesture(states)
46
+ now = time.time()
47
+ if char and char != last_char and now - last_time > 1 and len(char) == 1:
48
+ sentence += char
49
+ last_char = char
50
+ last_time = now
51
+ return frame, sentence, last_char, last_time
52
+
53
+ def speak(sentence):
54
+ return speak_text(sentence) if sentence.strip() else None
55
+
56
+ def clear():
57
+ return "", "", 0.0
58
+
59
+ logo = Image.open(BytesIO(requests.get(
60
+ "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/logo.png"
61
+ ).content))
62
+
63
+ with open("styles.css") as f:
64
+ css = f.read()
65
+
66
+ with gr.Blocks(css=css, title="Hand2Voice") as demo:
67
+ gr.Image(logo, show_label=False, height=100)
68
+ gr.HTML("<div class='mission-box'><div class='mission-title'>Bridging Communication Gaps with AI</div><div class='mission-text'>Sign language to speech in real time.</div></div>")
69
+
70
+ with gr.Row():
71
+ cam = gr.Image(source="webcam", streaming=True, type="numpy")
72
+ out = gr.HTML("<div class='output-text'>🤚 Waiting for gestures...</div>")
73
+
74
+ speak_btn = gr.Button("🔊 Speak Sentence")
75
+ clear_btn = gr.Button("🧹 Clear Text")
76
+ audio = gr.Audio(autoplay=True)
77
+
78
+ sentence = gr.State("")
79
+ last_char = gr.State("")
80
+ last_time = gr.State(0.0)
81
+
82
+ cam.stream(process, [cam, sentence, last_char, last_time], [cam, sentence, last_char, last_time])\
83
+ .then(lambda s: f"<div class='output-text'>{s}</div>", sentence, out)
84
+
85
+ speak_btn.click(speak, sentence, audio)
86
+ clear_btn.click(clear, outputs=[sentence, last_char, last_time])
87
+
88
+ demo.launch()
gestures_rules.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "gestures": {
4
+ "A": { "pattern": [0,0,0,0,0] },
5
+ "B": { "pattern": [0,1,1,1,1] },
6
+ "C": { "pattern": [1,1,1,1,0] },
7
+ "D": { "pattern": [0,1,0,0,0] },
8
+ "E": { "pattern": [0,0,0,0,1] },
9
+ "F": { "pattern": [1,0,1,1,1] },
10
+ "G": { "pattern": [1,1,0,0,0] },
11
+ "H": { "pattern": [0,1,1,0,0] },
12
+ "I": { "pattern": [0,0,0,0,1] },
13
+ "J": { "pattern": [1,0,0,0,1] },
14
+ "K": { "pattern": [1,1,1,0,0] },
15
+ "L": { "pattern": [1,1,0,0,0] },
16
+ "M": { "pattern": [0,1,1,1,0] },
17
+ "N": { "pattern": [0,1,1,0,0] },
18
+ "O": { "pattern": [1,1,1,1,1] },
19
+ "P": { "pattern": [1,1,0,1,0] },
20
+ "Q": { "pattern": [1,1,0,0,1] },
21
+ "R": { "pattern": [0,1,1,0,1] },
22
+ "S": { "pattern": [0,0,0,0,0] },
23
+ "T": { "pattern": [1,0,0,0,0] },
24
+ "U": { "pattern": [0,1,1,0,0] },
25
+ "V": { "pattern": [0,1,1,0,1] },
26
+ "W": { "pattern": [0,1,1,1,0] },
27
+ "X": { "pattern": [0,1,0,0,1] },
28
+ "Y": { "pattern": [1,0,0,0,1] },
29
+ "Z": { "pattern": [1,1,0,0,0] }
30
+ }
31
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ opencv-python-headless
3
+ mediapipe
4
+ numpy
5
+ gTTS
6
+ Pillow
7
+ requests
styles.css ADDED
@@ -0,0 +1 @@
 
 
1
+ /* Custom CSS from user (unchanged) */