vsign / app.py
mkfallah's picture
Update app.py
d0c8524 verified
import tempfile
from micro_gestures import *
from composite_gestures import detect_composite_gesture
# initialize mediapipe modules
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
min_tracking_confidence=0.5
)
def process_video(video_path, target_width=640):
# open video file
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
out = None
sequence_buffer = [] # buffer for micro-gestures
last_label = "None" # store last detected gesture label
while cap.isOpened():
ret, frame = cap.read()
break
h0, w0 = frame.shape[:2]
# resize frame keeping aspect ratio
scale = target_width / float(w0)
target_height = int(round(h0 * scale))
frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
# convert to rgb
rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
# hand detection
hand_results = hands.process(rgb_small)
micro_label = ""
if hand_results.multi_hand_landmarks:
for hand_landmarks in hand_results.multi_hand_landmarks:
mp_drawing.draw_landmarks(frame_small, hand_landmarks, mp_hands.HAND_CONNECTIONS)
landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
if fist_closed(landmarks):
micro_label = "fist_closed"
elif palm_open(landmarks):
micro_label = "index_up"
elif thumb_up(landmarks):
micro_label = "thumb_up"
if micro_label:
sequence_buffer.append(micro_label)
if len(sequence_buffer) > 5:
sequence_buffer.pop(0)
# detect composite gesture from micro-gesture sequence
if sequence_buffer:
detected = detect_composite_gesture(sequence_buffer)
if detected:
last_label = detected
# initialize video writer
if out is None:
fps = cap.get(cv2.CAP_PROP_FPS)
if fps <= 0 or fps > 120:
(frame_small.shape[1], frame_small.shape[0])
)
# write processed frame
out.write(frame_small)
cap.release()
if out:
out.release()
# return video path and last detected label
return temp_output.name, last_label
# gradio interface
iface = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Upload or Record Video"),
gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width")
],
outputs=[
gr.Video(label="Processed Video with Landmarks"),
gr.Textbox(label="Detected Gesture", interactive=False)
],
title="Hand & Body Pose Detection",
description="Upload or record a video, see MediaPipe detect hand landmarks. Gesture label is shown below the video."
)
if __name__ == "__main__":