Spaces:

sharmamohit8624
/

ChatGS

Sleeping

App Files Files Community

ChatGS / gesture_detection.py

sharmamohit8624

Update gesture_detection.py

c4fe1db verified about 1 month ago

raw

history blame contribute delete

8 kB

	# HF rebuild trigger
	import joblib

	try:
	import keyboard
	except ImportError:
	keyboard = None

	try:
	import pyautogui
	except ImportError:
	pyautogui = None

	from tensorflow.keras.models import load_model
	# import keyboard

	from collections import deque
	import time
	import numpy as np
	import mediapipe as mp
	import cv2
	import warnings
	warnings.filterwarnings(
	"ignore",
	message="SymbolDatabase.GetPrototype() is deprecated.*",
	category=UserWarning,
	module="google.protobuf.symbol_database"
	)


	class GestureControl:
	def __init__(self, model_path='asl_model.h5', encoder_path='label_encoder.pkl'):
	# Load ASL model and encoder
	self.model = load_model(model_path, compile=False)
	self.le = joblib.load(encoder_path)

	# Setup MediaPipe Hands
	self.mp_hands = mp.solutions.hands
	self.hands = self.mp_hands.Hands(
	min_detection_confidence=0.7,
	min_tracking_confidence=0.7
	)

	# Screen metrics for mouse
	self.screen_w, self.screen_h = pyautogui.size()

	# Mode: False = ASL, True = gesture
	self.gesture_mode = False
	self.switch_cooldown = 5.0
	self.last_switch_time = 0
	self.queue = deque(maxlen=4)

	# Mouse smoothing
	self.prev_x = 0
	self.prev_y = 0
	self.smooth = 0.05
	self.deadzone = 0.05
	self.last_right = 0
	self.right_cooldown = 1.0

	# ASL cooldown
	self.asl_cooldown = 2.0
	self.last_asl = 0
	self.last_label = None
	self.typed_text = ""

	# Key mapping
	self.map = {'space': 'space',
	'del': 'backspace', 'delete': 'backspace'}

	self.running = False
	self.cam = None

	def set_webcam(self, cam):
	if self.cam and self.cam.isOpened():
	self.cam.release()
	self.cam = cam

	def start(self):
	if not (self.cam and self.cam.isOpened()):
	raise RuntimeError("Call set_webcam() first.")
	self.typed_text = ""
	self.running = True
	self._loop()

	def stop(self):
	self.running = False
	if self.cam and self.cam.isOpened():
	self.cam.release()
	self.cam = None

	def get_typed_text(self):
	return self.typed_text

	def _normalize(self, flat):
	w = flat[:3]
	pts = [[flat[i]-w[0], flat[i+1]-w[1], flat[i+2]-w[2]]
	for i in range(0, len(flat), 3)]
	arr = np.array(pts)
	m = np.max(np.linalg.norm(arr-arr[0], axis=1))
	return (arr/m).flatten() if m > 0 else arr.flatten()

	def _finger_states(self, lm):
	H = self.mp_hands
	st = [lm[H.HandLandmark.THUMB_TIP].x > lm[H.HandLandmark.THUMB_IP].x]
	for tip, pip in [(H.HandLandmark.INDEX_FINGER_TIP, H.HandLandmark.INDEX_FINGER_PIP),
	(H.HandLandmark.MIDDLE_FINGER_TIP,
	H.HandLandmark.MIDDLE_FINGER_PIP),
	(H.HandLandmark.RING_FINGER_TIP,
	H.HandLandmark.RING_FINGER_PIP),
	(H.HandLandmark.PINKY_TIP, H.HandLandmark.PINKY_PIP)]:
	st.append(lm[tip].y < lm[pip].y)
	return st

	def _detect_simple(self, lm):
	st = self._finger_states(lm)
	H = self.mp_hands
	# Thumb-ring toggle
	d = np.linalg.norm([
	lm[H.HandLandmark.THUMB_TIP].x -
	lm[H.HandLandmark.RING_FINGER_TIP].x,
	lm[H.HandLandmark.THUMB_TIP].y -
	lm[H.HandLandmark.RING_FINGER_TIP].y
	])
	if d < 0.05 and st[1] and st[2] and not st[3] and not st[4]:
	return 'thumb_ring'
	# Pinch left click
	dp = np.linalg.norm([
	lm[H.HandLandmark.THUMB_TIP].x -
	lm[H.HandLandmark.INDEX_FINGER_TIP].x,
	lm[H.HandLandmark.THUMB_TIP].y -
	lm[H.HandLandmark.INDEX_FINGER_TIP].y
	])
	if dp < 0.05 and st[1]:
	return 'Pinch'
	# Right click
	dr = np.linalg.norm([
	lm[H.HandLandmark.INDEX_FINGER_TIP].x -
	lm[H.HandLandmark.MIDDLE_FINGER_TIP].x,
	lm[H.HandLandmark.INDEX_FINGER_TIP].y -
	lm[H.HandLandmark.MIDDLE_FINGER_TIP].y
	])
	if dr < 0.06 and st[1] and st[2] and not st[3] and not st[4]:
	return 'Right Click'
	# Volume
	if st == [True, False, False, False, False]:
	return 'Volume Up'
	if st == [False, False, False, False, True]:
	return 'Volume Down'
	return None

	def _loop(self):
	import pyautogui
	import keyboard
	while self.running and self.cam.isOpened():
	ret, frame = self.cam.read()
	if not ret:
	break
	frame = cv2.flip(frame, 1)
	rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	res = self.hands.process(rgb)
	now = time.time()
	simple = None
	if res.multi_hand_landmarks:
	hand = res.multi_hand_landmarks[0]
	lm = hand.landmark
	simple = self._detect_simple(lm)
	self.queue.append(simple)
	# Mode toggle via thumb-ring
	if now - self.last_switch_time > self.switch_cooldown and self.queue.count('thumb_ring') >= 3:
	self.gesture_mode = not self.gesture_mode
	print(
	f"Mode now: {'Gesture' if self.gesture_mode else 'ASL'}")
	self.last_switch_time = now
	self.queue.clear()
	if self.gesture_mode:
	# Mouse behavior every frame
	tip = lm[self.mp_hands.HandLandmark.INDEX_FINGER_TIP]
	x = int(tip.x*self.screen_w)
	y = int(tip.y*self.screen_h)
	dx = abs(tip.x - (self.prev_x/self.screen_w))
	dy = abs(tip.y - (self.prev_y/self.screen_h))
	if dx > self.deadzone or dy > self.deadzone:
	nx = int(self.prev_x + (x-self.prev_x)*self.smooth)
	ny = int(self.prev_y + (y-self.prev_y)*self.smooth)
	pyautogui.moveTo(nx, ny)
	self.prev_x, self.prev_y = nx, ny
	if simple == 'Pinch':
	pyautogui.click()
	elif simple == 'Right Click' and now-self.last_right > self.right_cooldown:
	pyautogui.click(button='right')
	self.last_right = now
	elif simple == 'Volume Up':
	pyautogui.press('volumeup')
	elif simple == 'Volume Down':
	pyautogui.press('volumedown')
	else:
	# ASL only when cooldown passed
	flat = [c for p in lm for c in (p.x, p.y, p.z)]
	if now - self.last_asl > self.asl_cooldown:
	data = np.array([self._normalize(flat)])
	pred = self.model.predict(data, verbose=0)[0]
	idx = np.argmax(pred)
	label = self.le.inverse_transform([idx])[0]
	print(f"ASL: {label}")
	if label != self.last_label:
	if label in self.map:
	pyautogui.press(self.map[label])
	elif len(label) == 1:
	pyautogui.typewrite(label)
	self.typed_text += label
	self.last_label = label
	self.last_asl = now
	# ESC to exit
	if keyboard.is_pressed('esc'):
	break
	self.running = False
	if self.cam and self.cam.isOpened():
	self.cam.release()
	print("GestureControl stopped.")