|
|
import tempfile |
|
|
from micro_gestures import * |
|
|
from composite_gestures import detect_composite_gesture |
|
|
|
|
|
|
|
|
mp_hands = mp.solutions.hands |
|
|
mp_pose = mp.solutions.pose |
|
|
mp_drawing = mp.solutions.drawing_utils |
|
|
min_tracking_confidence=0.5 |
|
|
) |
|
|
|
|
|
def process_video(video_path, target_width=640): |
|
|
|
|
|
cap = cv2.VideoCapture(video_path) |
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
|
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") |
|
|
out = None |
|
|
|
|
|
sequence_buffer = [] |
|
|
last_label = "None" |
|
|
|
|
|
while cap.isOpened(): |
|
|
ret, frame = cap.read() |
|
|
break |
|
|
|
|
|
h0, w0 = frame.shape[:2] |
|
|
|
|
|
scale = target_width / float(w0) |
|
|
target_height = int(round(h0 * scale)) |
|
|
frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA) |
|
|
|
|
|
|
|
|
rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
|
|
|
hand_results = hands.process(rgb_small) |
|
|
micro_label = "" |
|
|
if hand_results.multi_hand_landmarks: |
|
|
for hand_landmarks in hand_results.multi_hand_landmarks: |
|
|
mp_drawing.draw_landmarks(frame_small, hand_landmarks, mp_hands.HAND_CONNECTIONS) |
|
|
landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark] |
|
|
if fist_closed(landmarks): |
|
|
micro_label = "fist_closed" |
|
|
elif palm_open(landmarks): |
|
|
micro_label = "index_up" |
|
|
elif thumb_up(landmarks): |
|
|
micro_label = "thumb_up" |
|
|
|
|
|
if micro_label: |
|
|
sequence_buffer.append(micro_label) |
|
|
if len(sequence_buffer) > 5: |
|
|
sequence_buffer.pop(0) |
|
|
|
|
|
|
|
|
if sequence_buffer: |
|
|
detected = detect_composite_gesture(sequence_buffer) |
|
|
if detected: |
|
|
last_label = detected |
|
|
|
|
|
|
|
|
if out is None: |
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
|
if fps <= 0 or fps > 120: |
|
|
(frame_small.shape[1], frame_small.shape[0]) |
|
|
) |
|
|
|
|
|
|
|
|
out.write(frame_small) |
|
|
|
|
|
cap.release() |
|
|
if out: |
|
|
out.release() |
|
|
|
|
|
|
|
|
return temp_output.name, last_label |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_video, |
|
|
inputs=[ |
|
|
gr.Video(label="Upload or Record Video"), |
|
|
gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width") |
|
|
], |
|
|
outputs=[ |
|
|
gr.Video(label="Processed Video with Landmarks"), |
|
|
gr.Textbox(label="Detected Gesture", interactive=False) |
|
|
|
|
|
], |
|
|
title="Hand & Body Pose Detection", |
|
|
description="Upload or record a video, see MediaPipe detect hand landmarks. Gesture label is shown below the video." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |