ChatGS / gesture_detection.py
sharmamohit8624's picture
Update gesture_detection.py
c4fe1db verified
# HF rebuild trigger
import joblib
try:
import keyboard
except ImportError:
keyboard = None
try:
import pyautogui
except ImportError:
pyautogui = None
from tensorflow.keras.models import load_model
# import keyboard
from collections import deque
import time
import numpy as np
import mediapipe as mp
import cv2
import warnings
warnings.filterwarnings(
"ignore",
message="SymbolDatabase.GetPrototype() is deprecated.*",
category=UserWarning,
module="google.protobuf.symbol_database"
)
class GestureControl:
def __init__(self, model_path='asl_model.h5', encoder_path='label_encoder.pkl'):
# Load ASL model and encoder
self.model = load_model(model_path, compile=False)
self.le = joblib.load(encoder_path)
# Setup MediaPipe Hands
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
min_detection_confidence=0.7,
min_tracking_confidence=0.7
)
# Screen metrics for mouse
self.screen_w, self.screen_h = pyautogui.size()
# Mode: False = ASL, True = gesture
self.gesture_mode = False
self.switch_cooldown = 5.0
self.last_switch_time = 0
self.queue = deque(maxlen=4)
# Mouse smoothing
self.prev_x = 0
self.prev_y = 0
self.smooth = 0.05
self.deadzone = 0.05
self.last_right = 0
self.right_cooldown = 1.0
# ASL cooldown
self.asl_cooldown = 2.0
self.last_asl = 0
self.last_label = None
self.typed_text = ""
# Key mapping
self.map = {'space': 'space',
'del': 'backspace', 'delete': 'backspace'}
self.running = False
self.cam = None
def set_webcam(self, cam):
if self.cam and self.cam.isOpened():
self.cam.release()
self.cam = cam
def start(self):
if not (self.cam and self.cam.isOpened()):
raise RuntimeError("Call set_webcam() first.")
self.typed_text = ""
self.running = True
self._loop()
def stop(self):
self.running = False
if self.cam and self.cam.isOpened():
self.cam.release()
self.cam = None
def get_typed_text(self):
return self.typed_text
def _normalize(self, flat):
w = flat[:3]
pts = [[flat[i]-w[0], flat[i+1]-w[1], flat[i+2]-w[2]]
for i in range(0, len(flat), 3)]
arr = np.array(pts)
m = np.max(np.linalg.norm(arr-arr[0], axis=1))
return (arr/m).flatten() if m > 0 else arr.flatten()
def _finger_states(self, lm):
H = self.mp_hands
st = [lm[H.HandLandmark.THUMB_TIP].x > lm[H.HandLandmark.THUMB_IP].x]
for tip, pip in [(H.HandLandmark.INDEX_FINGER_TIP, H.HandLandmark.INDEX_FINGER_PIP),
(H.HandLandmark.MIDDLE_FINGER_TIP,
H.HandLandmark.MIDDLE_FINGER_PIP),
(H.HandLandmark.RING_FINGER_TIP,
H.HandLandmark.RING_FINGER_PIP),
(H.HandLandmark.PINKY_TIP, H.HandLandmark.PINKY_PIP)]:
st.append(lm[tip].y < lm[pip].y)
return st
def _detect_simple(self, lm):
st = self._finger_states(lm)
H = self.mp_hands
# Thumb-ring toggle
d = np.linalg.norm([
lm[H.HandLandmark.THUMB_TIP].x -
lm[H.HandLandmark.RING_FINGER_TIP].x,
lm[H.HandLandmark.THUMB_TIP].y -
lm[H.HandLandmark.RING_FINGER_TIP].y
])
if d < 0.05 and st[1] and st[2] and not st[3] and not st[4]:
return 'thumb_ring'
# Pinch left click
dp = np.linalg.norm([
lm[H.HandLandmark.THUMB_TIP].x -
lm[H.HandLandmark.INDEX_FINGER_TIP].x,
lm[H.HandLandmark.THUMB_TIP].y -
lm[H.HandLandmark.INDEX_FINGER_TIP].y
])
if dp < 0.05 and st[1]:
return 'Pinch'
# Right click
dr = np.linalg.norm([
lm[H.HandLandmark.INDEX_FINGER_TIP].x -
lm[H.HandLandmark.MIDDLE_FINGER_TIP].x,
lm[H.HandLandmark.INDEX_FINGER_TIP].y -
lm[H.HandLandmark.MIDDLE_FINGER_TIP].y
])
if dr < 0.06 and st[1] and st[2] and not st[3] and not st[4]:
return 'Right Click'
# Volume
if st == [True, False, False, False, False]:
return 'Volume Up'
if st == [False, False, False, False, True]:
return 'Volume Down'
return None
def _loop(self):
import pyautogui
import keyboard
while self.running and self.cam.isOpened():
ret, frame = self.cam.read()
if not ret:
break
frame = cv2.flip(frame, 1)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = self.hands.process(rgb)
now = time.time()
simple = None
if res.multi_hand_landmarks:
hand = res.multi_hand_landmarks[0]
lm = hand.landmark
simple = self._detect_simple(lm)
self.queue.append(simple)
# Mode toggle via thumb-ring
if now - self.last_switch_time > self.switch_cooldown and self.queue.count('thumb_ring') >= 3:
self.gesture_mode = not self.gesture_mode
print(
f"Mode now: {'Gesture' if self.gesture_mode else 'ASL'}")
self.last_switch_time = now
self.queue.clear()
if self.gesture_mode:
# Mouse behavior every frame
tip = lm[self.mp_hands.HandLandmark.INDEX_FINGER_TIP]
x = int(tip.x*self.screen_w)
y = int(tip.y*self.screen_h)
dx = abs(tip.x - (self.prev_x/self.screen_w))
dy = abs(tip.y - (self.prev_y/self.screen_h))
if dx > self.deadzone or dy > self.deadzone:
nx = int(self.prev_x + (x-self.prev_x)*self.smooth)
ny = int(self.prev_y + (y-self.prev_y)*self.smooth)
pyautogui.moveTo(nx, ny)
self.prev_x, self.prev_y = nx, ny
if simple == 'Pinch':
pyautogui.click()
elif simple == 'Right Click' and now-self.last_right > self.right_cooldown:
pyautogui.click(button='right')
self.last_right = now
elif simple == 'Volume Up':
pyautogui.press('volumeup')
elif simple == 'Volume Down':
pyautogui.press('volumedown')
else:
# ASL only when cooldown passed
flat = [c for p in lm for c in (p.x, p.y, p.z)]
if now - self.last_asl > self.asl_cooldown:
data = np.array([self._normalize(flat)])
pred = self.model.predict(data, verbose=0)[0]
idx = np.argmax(pred)
label = self.le.inverse_transform([idx])[0]
print(f"ASL: {label}")
if label != self.last_label:
if label in self.map:
pyautogui.press(self.map[label])
elif len(label) == 1:
pyautogui.typewrite(label)
self.typed_text += label
self.last_label = label
self.last_asl = now
# ESC to exit
if keyboard.is_pressed('esc'):
break
self.running = False
if self.cam and self.cam.isOpened():
self.cam.release()
print("GestureControl stopped.")