File size: 3,134 Bytes
6e8f7ef eaba712 4a85778 d0c8524 648ef5a eb738c6 648ef5a 46760ce d0c8524 eb738c6 648ef5a 4a85778 d0c8524 d5c1510 648ef5a 4a85778 648ef5a d0c8524 648ef5a d0c8524 648ef5a 4a85778 d0c8524 648ef5a 46760ce 648ef5a d0c8524 4f45eb3 4759b08 4f45eb3 4759b08 4f45eb3 4759b08 d0c8524 eaba712 4759b08 d0c8524 eaba712 bca3a42 d0c8524 648ef5a 8615149 d5c1510 46760ce 648ef5a d0c8524 bca3a42 4a85778 eb738c6 648ef5a d0c8524 c8c8dca d0c8524 542de99 d0c8524 648ef5a bca3a42 46760ce d0c8524 bca3a42 d0c8524 648ef5a d0c8524 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import tempfile
from micro_gestures import *
from composite_gestures import detect_composite_gesture
# initialize mediapipe modules
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
min_tracking_confidence=0.5
)
def process_video(video_path, target_width=640):
# open video file
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
out = None
sequence_buffer = [] # buffer for micro-gestures
last_label = "None" # store last detected gesture label
while cap.isOpened():
ret, frame = cap.read()
break
h0, w0 = frame.shape[:2]
# resize frame keeping aspect ratio
scale = target_width / float(w0)
target_height = int(round(h0 * scale))
frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
# convert to rgb
rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
# hand detection
hand_results = hands.process(rgb_small)
micro_label = ""
if hand_results.multi_hand_landmarks:
for hand_landmarks in hand_results.multi_hand_landmarks:
mp_drawing.draw_landmarks(frame_small, hand_landmarks, mp_hands.HAND_CONNECTIONS)
landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
if fist_closed(landmarks):
micro_label = "fist_closed"
elif palm_open(landmarks):
micro_label = "index_up"
elif thumb_up(landmarks):
micro_label = "thumb_up"
if micro_label:
sequence_buffer.append(micro_label)
if len(sequence_buffer) > 5:
sequence_buffer.pop(0)
# detect composite gesture from micro-gesture sequence
if sequence_buffer:
detected = detect_composite_gesture(sequence_buffer)
if detected:
last_label = detected
# initialize video writer
if out is None:
fps = cap.get(cv2.CAP_PROP_FPS)
if fps <= 0 or fps > 120:
(frame_small.shape[1], frame_small.shape[0])
)
# write processed frame
out.write(frame_small)
cap.release()
if out:
out.release()
# return video path and last detected label
return temp_output.name, last_label
# gradio interface
iface = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Upload or Record Video"),
gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width")
],
outputs=[
gr.Video(label="Processed Video with Landmarks"),
gr.Textbox(label="Detected Gesture", interactive=False)
],
title="Hand & Body Pose Detection",
description="Upload or record a video, see MediaPipe detect hand landmarks. Gesture label is shown below the video."
)
if __name__ == "__main__": |