arshtech commited on
Commit
db6710f
Β·
verified Β·
1 Parent(s): 7140eef

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +105 -0
  2. gesture_recognizer.py +19 -0
  3. requirements.txt +6 -3
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ import numpy as np
4
+ import requests
5
+ from gtts import gTTS
6
+ import tempfile
7
+ import pandas
8
+
9
+ # ---------------- STREAMLIT CONFIG ----------------
10
+ st.set_page_config(page_title="Hand2Voice", layout="wide")
11
+ st.title("🀟 Hand2Voice")
12
+ st.write("Hand Gesture to Voice Conversion")
13
+
14
+ # ---------------- CONSTANTS ----------------
15
+ GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
16
+
17
+ # ---------------- LOAD GESTURES ----------------
18
+ @st.cache_data
19
+ def load_gestures():
20
+ return requests.get(GESTURE_URL).json()["gestures"]
21
+
22
+ # ---------------- LAZY MEDIAPIPE ----------------
23
+ @st.cache_resource
24
+ def load_mediapipe():
25
+ import mediapipe as mp
26
+ mp_hands = mp.solutions.hands
27
+ return mp_hands.Hands(
28
+ static_image_mode=True,
29
+ max_num_hands=1,
30
+ min_detection_confidence=0.7
31
+ )
32
+
33
+ # ---------------- FINGER LOGIC ----------------
34
+ def get_finger_states(hand_landmarks):
35
+ finger_tips = [4, 8, 12, 16, 20]
36
+ finger_bases = [2, 6, 10, 14, 18]
37
+
38
+ states = []
39
+
40
+ states.append(
41
+ 1 if hand_landmarks.landmark[4].x >
42
+ hand_landmarks.landmark[3].x else 0
43
+ )
44
+
45
+ for tip, base in zip(finger_tips[1:], finger_bases[1:]):
46
+ states.append(
47
+ 1 if hand_landmarks.landmark[tip].y <
48
+ hand_landmarks.landmark[base].y else 0
49
+ )
50
+
51
+ return states
52
+
53
+ # ---------------- MATCH GESTURE ----------------
54
+ def match_gesture(states, rules):
55
+ for name, info in rules.items():
56
+ if states == info["pattern"]:
57
+ return name
58
+ return "Unknown Gesture"
59
+
60
+ # ---------------- RECOGNITION ----------------
61
+ def recognize_gesture(frame, hands, rules):
62
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
+ result = hands.process(rgb)
64
+
65
+ if result.multi_hand_landmarks:
66
+ for hand_landmarks in result.multi_hand_landmarks:
67
+ states = get_finger_states(hand_landmarks)
68
+ return match_gesture(states, rules)
69
+
70
+ return "No Hand Detected"
71
+
72
+ # ---------------- TEXT TO SPEECH ----------------
73
+ def speak_text(text):
74
+ tts = gTTS(text=text, lang="en")
75
+ file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
76
+ tts.save(file.name)
77
+ return file.name
78
+
79
+ # ---------------- UI ----------------
80
+ col1, col2 = st.columns(2)
81
+
82
+ with col1:
83
+ st.subheader("πŸ“· Camera")
84
+ image = st.camera_input("Capture hand gesture")
85
+
86
+ with col2:
87
+ st.subheader("πŸ“ Output")
88
+
89
+ if image:
90
+ gestures = load_gestures()
91
+ hands = load_mediapipe()
92
+
93
+ img_bytes = image.getvalue()
94
+ img_array = np.frombuffer(img_bytes, np.uint8)
95
+ frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
96
+
97
+ gesture = recognize_gesture(frame, hands, gestures)
98
+
99
+ st.success(f"πŸ”Š {gesture}")
100
+
101
+ if gesture not in ["Unknown Gesture", "No Hand Detected"]:
102
+ audio = speak_text(gesture)
103
+ st.audio(audio)
104
+ else:
105
+ st.info("Capture an image to start")
gesture_recognizer.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from hand_tracker import hands
3
+ from gesture_logic import get_finger_states
4
+ from gesture_matcher import match_gesture
5
+ from gesture_loader import load_gestures
6
+
7
+ gesture_rules = load_gestures()
8
+
9
+ def recognize_gesture(frame):
10
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
11
+ results = hands.process(rgb)
12
+
13
+ if results.multi_hand_landmarks:
14
+ for hand_landmarks in results.multi_hand_landmarks:
15
+ finger_states = get_finger_states(hand_landmarks)
16
+ gesture = match_gesture(finger_states, gesture_rules)
17
+ return gesture
18
+
19
+ return None
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ streamlit
2
+ opencv-python-headless
3
+ mediapipe==0.10.9
4
+ numpy
5
+ requests
6
+ gTTS