Spaces:

mkfallah
/

vsign

Runtime error

App Files Files Community

vsign / app.py

mkfallah

Update app.py

d0c8524 verified 4 months ago

raw

history blame contribute delete

3.13 kB

	import tempfile
	from micro_gestures import *
	from composite_gestures import detect_composite_gesture

	# initialize mediapipe modules
	mp_hands = mp.solutions.hands
	mp_pose = mp.solutions.pose
	mp_drawing = mp.solutions.drawing_utils
	min_tracking_confidence=0.5
	)

	def process_video(video_path, target_width=640):
	# open video file
	cap = cv2.VideoCapture(video_path)
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
	out = None

	sequence_buffer = [] # buffer for micro-gestures
	last_label = "None" # store last detected gesture label

	while cap.isOpened():
	ret, frame = cap.read()
	break

	h0, w0 = frame.shape[:2]
	# resize frame keeping aspect ratio
	scale = target_width / float(w0)
	target_height = int(round(h0 * scale))
	frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)

	# convert to rgb
	rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)

	# hand detection
	hand_results = hands.process(rgb_small)
	micro_label = ""
	if hand_results.multi_hand_landmarks:
	for hand_landmarks in hand_results.multi_hand_landmarks:
	mp_drawing.draw_landmarks(frame_small, hand_landmarks, mp_hands.HAND_CONNECTIONS)
	landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
	if fist_closed(landmarks):
	micro_label = "fist_closed"
	elif palm_open(landmarks):
	micro_label = "index_up"
	elif thumb_up(landmarks):
	micro_label = "thumb_up"

	if micro_label:
	sequence_buffer.append(micro_label)
	if len(sequence_buffer) > 5:
	sequence_buffer.pop(0)

	# detect composite gesture from micro-gesture sequence
	if sequence_buffer:
	detected = detect_composite_gesture(sequence_buffer)
	if detected:
	last_label = detected

	# initialize video writer
	if out is None:
	fps = cap.get(cv2.CAP_PROP_FPS)
	if fps <= 0 or fps > 120:
	(frame_small.shape[1], frame_small.shape[0])
	)

	# write processed frame
	out.write(frame_small)

	cap.release()
	if out:
	out.release()

	# return video path and last detected label
	return temp_output.name, last_label


	# gradio interface
	iface = gr.Interface(
	fn=process_video,
	inputs=[
	gr.Video(label="Upload or Record Video"),
	gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width")
	],
	outputs=[
	gr.Video(label="Processed Video with Landmarks"),
	gr.Textbox(label="Detected Gesture", interactive=False)

	],
	title="Hand & Body Pose Detection",
	description="Upload or record a video, see MediaPipe detect hand landmarks. Gesture label is shown below the video."
	)

	if __name__ == "__main__":