mkfallah commited on
Commit
4759b08
·
verified ·
1 Parent(s): 2b70dc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -61
app.py CHANGED
@@ -2,26 +2,15 @@ import gradio as gr
2
  import cv2
3
  import mediapipe as mp
4
  import tempfile
 
 
5
 
6
- # initialize mediapipe modules
7
  mp_hands = mp.solutions.hands
8
  mp_pose = mp.solutions.pose
9
  mp_drawing = mp.solutions.drawing_utils
10
 
11
- hands = mp_hands.Hands(
12
- static_image_mode=False,
13
- max_num_hands=2,
14
- min_detection_confidence=0.5,
15
- min_tracking_confidence=0.5
16
- )
17
-
18
- pose = mp_pose.Pose(
19
- static_image_mode=False,
20
- model_complexity=1,
21
- enable_segmentation=False,
22
- min_detection_confidence=0.5,
23
- min_tracking_confidence=0.5
24
- )
25
 
26
  def process_video(video_path, target_width=640):
27
  cap = cv2.VideoCapture(video_path)
@@ -29,7 +18,7 @@ def process_video(video_path, target_width=640):
29
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
30
  out = None
31
 
32
- last_label = "None" # store last detected gesture label
33
 
34
  while cap.isOpened():
35
  ret, frame = cap.read()
@@ -37,77 +26,64 @@ def process_video(video_path, target_width=640):
37
  break
38
 
39
  h0, w0 = frame.shape[:2]
40
- # resize frame keeping aspect ratio
41
  scale = target_width / float(w0)
42
  target_height = int(round(h0 * scale))
43
  frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
44
-
45
- # convert to rgb
46
  rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
47
 
48
- # hand detection
49
  hand_results = hands.process(rgb_small)
 
50
  if hand_results.multi_hand_landmarks:
51
  for hand_landmarks in hand_results.multi_hand_landmarks:
52
- mp_drawing.draw_landmarks(
53
- frame_small,
54
- hand_landmarks,
55
- mp_hands.HAND_CONNECTIONS,
56
- mp_drawing.DrawingSpec(color=(0,0,255), thickness=1, circle_radius=1),
57
- mp_drawing.DrawingSpec(color=(0,255,0), thickness=1, circle_radius=1)
58
- )
59
-
60
- # pose detection
61
- pose_results = pose.process(rgb_small)
62
- if pose_results.pose_landmarks:
63
- mp_drawing.draw_landmarks(
64
- frame_small,
65
- pose_results.pose_landmarks,
66
- mp_pose.POSE_CONNECTIONS,
67
- mp_drawing.DrawingSpec(color=(255,0,0), thickness=1, circle_radius=1),
68
- mp_drawing.DrawingSpec(color=(0,255,255), thickness=1, circle_radius=1)
69
- )
70
-
71
- # here you would detect gesture label (example placeholder)
72
- # last_label = detect_composite_gesture([...])
73
- last_label = "example_label"
74
-
75
- # initialize video writer
76
  if out is None:
77
  fps = cap.get(cv2.CAP_PROP_FPS)
78
  if fps <= 0 or fps > 120:
79
  fps = 30
80
-
81
- out = cv2.VideoWriter(
82
- temp_output.name,
83
- fourcc,
84
- fps,
85
- (frame_small.shape[1], frame_small.shape[0])
86
- )
87
 
88
- # write processed frame
89
- out.write(frame_small)
 
 
 
 
 
 
90
 
91
  cap.release()
92
  if out:
93
  out.release()
94
 
95
- # return both video path and last label for gradio
96
- return temp_output.name, last_label
97
 
98
- # gradio interface
99
  iface = gr.Interface(
100
  fn=process_video,
101
  inputs=[
102
  gr.Video(label="Upload or Record Video"),
103
  gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width")
104
  ],
105
- outputs=[
106
- gr.Video(label="Processed Video with Landmarks"),
107
- gr.Textbox(label="Detected Gesture", interactive=False)
108
- ],
109
  title="Hand & Body Pose Detection",
110
- description="Upload or record a video, and see MediaPipe detect hand and body landmarks with connections."
111
  )
112
 
113
  if __name__ == "__main__":
 
2
  import cv2
3
  import mediapipe as mp
4
  import tempfile
5
+ from micro_gestures import *
6
+ from composite_gestures import detect_composite_gesture
7
 
 
8
  mp_hands = mp.solutions.hands
9
  mp_pose = mp.solutions.pose
10
  mp_drawing = mp.solutions.drawing_utils
11
 
12
+ hands = mp_hands.Hands(static_image_mode=False,max_num_hands=2,min_detection_confidence=0.5,min_tracking_confidence=0.5)
13
+ pose = mp_pose.Pose(static_image_mode=False,model_complexity=1,enable_segmentation=False,min_detection_confidence=0.5,min_tracking_confidence=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def process_video(video_path, target_width=640):
16
  cap = cv2.VideoCapture(video_path)
 
18
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
19
  out = None
20
 
21
+ sequence_buffer = []
22
 
23
  while cap.isOpened():
24
  ret, frame = cap.read()
 
26
  break
27
 
28
  h0, w0 = frame.shape[:2]
 
29
  scale = target_width / float(w0)
30
  target_height = int(round(h0 * scale))
31
  frame_small = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
 
 
32
  rgb_small = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
33
 
 
34
  hand_results = hands.process(rgb_small)
35
+ micro_label = None
36
  if hand_results.multi_hand_landmarks:
37
  for hand_landmarks in hand_results.multi_hand_landmarks:
38
+ mp_drawing.draw_landmarks(frame_small, hand_landmarks, mp_hands.HAND_CONNECTIONS)
39
+ # detect micro-gestures
40
+ if fist_closed(hand_landmarks.landmark):
41
+ micro_label = "fist_closed"
42
+ elif palm_open(hand_landmarks.landmark):
43
+ micro_label = "palm_open"
44
+ elif index_pointing_up(hand_landmarks.landmark):
45
+ micro_label = "index_up"
46
+ elif thumb_up(hand_landmarks.landmark):
47
+ micro_label = "thumb_up"
48
+ # add label to buffer
49
+ if micro_label:
50
+ sequence_buffer.append(micro_label)
51
+ if len(sequence_buffer) > 5:
52
+ sequence_buffer.pop(0)
53
+
54
+ # detect composite gesture
55
+ composite_label = detect_composite_gesture(sequence_buffer)
56
+
 
 
 
 
 
57
  if out is None:
58
  fps = cap.get(cv2.CAP_PROP_FPS)
59
  if fps <= 0 or fps > 120:
60
  fps = 30
61
+ out = cv2.VideoWriter(temp_output.name, fourcc, fps, (frame_small.shape[1], frame_small.shape[0]))
 
 
 
 
 
 
62
 
63
+ # write label under frame
64
+ if composite_label:
65
+ # create separate label image
66
+ label_img = cv2.putText(frame_small.copy(), composite_label, (10, target_height-10),
67
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
68
+ out.write(label_img)
69
+ else:
70
+ out.write(frame_small)
71
 
72
  cap.release()
73
  if out:
74
  out.release()
75
 
76
+ return temp_output.name
 
77
 
 
78
  iface = gr.Interface(
79
  fn=process_video,
80
  inputs=[
81
  gr.Video(label="Upload or Record Video"),
82
  gr.Slider(minimum=160, maximum=1280, value=640, step=20, label="Processing Width")
83
  ],
84
+ outputs=gr.Video(label="Processed Video with Gestures"),
 
 
 
85
  title="Hand & Body Pose Detection",
86
+ description="Upload or record a video, see MediaPipe detect hand landmarks and composite gestures."
87
  )
88
 
89
  if __name__ == "__main__":