Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,6 @@ import time
|
|
| 5 |
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase
|
| 6 |
import av
|
| 7 |
from queue import Queue
|
| 8 |
-
import os
|
| 9 |
|
| 10 |
# MediaPipe components for gesture recognition and hand landmarks
|
| 11 |
mp_hands = mp.solutions.hands
|
|
@@ -22,38 +21,52 @@ model_path = 'model/gesture_recognizer.task'
|
|
| 22 |
if not os.path.exists(model_path):
|
| 23 |
raise FileNotFoundError(f"Model file not found at {model_path}")
|
| 24 |
|
| 25 |
-
# Gesture recognizer configuration
|
| 26 |
-
options = GestureRecognizerOptions(
|
| 27 |
-
base_options=BaseOptions(model_asset_path=model_path),
|
| 28 |
-
running_mode=VisionRunningMode.LIVE_STREAM
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
# Custom video processor class for handling webcam input
|
| 32 |
class VideoProcessor(VideoTransformerBase):
|
| 33 |
def __init__(self):
|
| 34 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
self.hands = mp_hands.Hands(
|
| 36 |
max_num_hands=2,
|
| 37 |
model_complexity=0,
|
| 38 |
min_detection_confidence=0.3,
|
| 39 |
min_tracking_confidence=0.3
|
| 40 |
)
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def recv(self, frame):
|
| 44 |
img = frame.to_ndarray(format="bgr24")
|
| 45 |
-
img = cv2.flip(img, 1) # Flip
|
| 46 |
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 47 |
|
| 48 |
# Perform hand landmark detection
|
| 49 |
hand_results = self.hands.process(img_rgb)
|
| 50 |
|
| 51 |
-
# Perform gesture recognition
|
| 52 |
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
|
| 53 |
current_time_ms = int(time.time() * 1000)
|
| 54 |
-
|
| 55 |
|
| 56 |
-
# Draw landmarks
|
| 57 |
if hand_results.multi_hand_landmarks:
|
| 58 |
for hand_landmarks in hand_results.multi_hand_landmarks:
|
| 59 |
mp_drawing.draw_landmarks(
|
|
@@ -64,12 +77,19 @@ class VideoProcessor(VideoTransformerBase):
|
|
| 64 |
mp_drawing_styles.get_default_hand_connections_style()
|
| 65 |
)
|
| 66 |
|
| 67 |
-
#
|
| 68 |
-
if
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
| 75 |
|
|
|
|
| 5 |
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase
|
| 6 |
import av
|
| 7 |
from queue import Queue
|
|
|
|
| 8 |
|
| 9 |
# MediaPipe components for gesture recognition and hand landmarks
|
| 10 |
mp_hands = mp.solutions.hands
|
|
|
|
| 21 |
if not os.path.exists(model_path):
|
| 22 |
raise FileNotFoundError(f"Model file not found at {model_path}")
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Custom video processor class for handling webcam input
|
| 25 |
class VideoProcessor(VideoTransformerBase):
|
| 26 |
def __init__(self):
|
| 27 |
+
self.gesture_queue = Queue() # Queue to store gesture results
|
| 28 |
+
|
| 29 |
+
# Configure the Gesture Recognizer with a callback
|
| 30 |
+
self.recognizer = GestureRecognizer.create_from_options(
|
| 31 |
+
GestureRecognizerOptions(
|
| 32 |
+
base_options=BaseOptions(model_asset_path=model_path),
|
| 33 |
+
running_mode=VisionRunningMode.LIVE_STREAM,
|
| 34 |
+
result_callback=self.handle_result, # Set the result callback
|
| 35 |
+
)
|
| 36 |
+
)
|
| 37 |
self.hands = mp_hands.Hands(
|
| 38 |
max_num_hands=2,
|
| 39 |
model_complexity=0,
|
| 40 |
min_detection_confidence=0.3,
|
| 41 |
min_tracking_confidence=0.3
|
| 42 |
)
|
| 43 |
+
|
| 44 |
+
def handle_result(self, result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
|
| 45 |
+
# Process the result and add it to the queue
|
| 46 |
+
gestures = []
|
| 47 |
+
if result.gestures:
|
| 48 |
+
for hand_gestures in result.gestures:
|
| 49 |
+
for gesture in hand_gestures:
|
| 50 |
+
gestures.append(f"{gesture.category_name} (Confidence: {gesture.score:.2f})")
|
| 51 |
+
else:
|
| 52 |
+
gestures.append("No gestures detected.")
|
| 53 |
+
|
| 54 |
+
self.gesture_queue.put(gestures)
|
| 55 |
|
| 56 |
def recv(self, frame):
|
| 57 |
img = frame.to_ndarray(format="bgr24")
|
| 58 |
+
img = cv2.flip(img, 1) # Flip horizontally
|
| 59 |
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 60 |
|
| 61 |
# Perform hand landmark detection
|
| 62 |
hand_results = self.hands.process(img_rgb)
|
| 63 |
|
| 64 |
+
# Perform gesture recognition asynchronously
|
| 65 |
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
|
| 66 |
current_time_ms = int(time.time() * 1000)
|
| 67 |
+
self.recognizer.recognize_async(mp_image, current_time_ms)
|
| 68 |
|
| 69 |
+
# Draw hand landmarks on the frame
|
| 70 |
if hand_results.multi_hand_landmarks:
|
| 71 |
for hand_landmarks in hand_results.multi_hand_landmarks:
|
| 72 |
mp_drawing.draw_landmarks(
|
|
|
|
| 77 |
mp_drawing_styles.get_default_hand_connections_style()
|
| 78 |
)
|
| 79 |
|
| 80 |
+
# Optionally display detected gestures from the queue
|
| 81 |
+
if not self.gesture_queue.empty():
|
| 82 |
+
gestures = self.gesture_queue.get()
|
| 83 |
+
for i, gesture in enumerate(gestures):
|
| 84 |
+
cv2.putText(
|
| 85 |
+
img,
|
| 86 |
+
gesture,
|
| 87 |
+
(10, 50 + i * 30),
|
| 88 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
| 89 |
+
1,
|
| 90 |
+
(0, 255, 0),
|
| 91 |
+
2,
|
| 92 |
+
)
|
| 93 |
|
| 94 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
| 95 |
|