vu0018 commited on
Commit
7f28013
·
verified ·
1 Parent(s): 9a13995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -34
app.py CHANGED
@@ -19,11 +19,7 @@ action_model = pipeline(
19
  def detect_pose_and_activity(video_file):
20
  """
21
  Process the uploaded video to detect human poses and classify activity.
22
- Optimizations:
23
- - Skip frames
24
- - Resize frames
25
- - Batch action prediction
26
- Returns annotated video and predicted action.
27
  """
28
  try:
29
  # Save uploaded video temporarily
@@ -37,53 +33,41 @@ def detect_pose_and_activity(video_file):
37
 
38
  fps = cap.get(cv2.CAP_PROP_FPS)
39
  if fps == 0:
40
- fps = 30
41
 
42
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43
  max_frames = int(min(total_frames/fps, 10) * fps) # limit 10s
44
 
45
  output_frames = []
46
- pil_frames_for_model = []
47
 
48
- frame_skip = 2 # process every 2nd frame
49
- target_size = (224, 224) # Resize for faster inference
50
-
51
- with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
52
- frame_index = 0
53
- while frame_index < max_frames:
54
  ret, frame = cap.read()
55
  if not ret:
56
  break
57
 
58
- # Resize frame for speed
59
- frame_small = cv2.resize(frame, target_size)
60
- image_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
61
-
62
- # Pose detection on full frame
63
- results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
64
  if results.pose_landmarks:
65
  mp.solutions.drawing_utils.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
66
 
67
  output_frames.append(frame)
68
 
69
- # Only process every Nth frame for action prediction
70
- if frame_index % frame_skip == 0:
71
- pil_image = Image.fromarray(image_rgb)
72
- pil_frames_for_model.append(pil_image)
73
-
74
- frame_index += 1
75
 
76
  cap.release()
77
 
78
  if len(output_frames) == 0:
79
  return None, "Error: No frames to process."
80
 
81
- # Batch prediction
82
- preds = action_model(pil_frames_for_model)
83
- action_labels = [pred['label'] for pred in preds]
84
-
85
  # Take the most frequent predicted action
86
- final_action = max(set(action_labels), key=action_labels.count)
87
 
88
  # Save annotated video
89
  output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
@@ -93,7 +77,7 @@ def detect_pose_and_activity(video_file):
93
  out.write(f)
94
  out.release()
95
 
96
- return output_file, f"Predicted Action: {final_action}"
97
 
98
  except Exception as e:
99
  return None, f"Runtime Error: {str(e)}"
@@ -103,8 +87,8 @@ iface = gr.Interface(
103
  fn=detect_pose_and_activity,
104
  inputs=gr.Video(label="Upload a Video (max 10s)"),
105
  outputs=[gr.Video(label="Pose Detection Output"), gr.Textbox(label="Detected Action")],
106
- title="Human Pose & Activity Recognition (Optimized)",
107
- description="Upload a short video (max 10s). The app detects human poses and predicts the activity quickly using frame skipping, resizing, and batch predictions."
108
  )
109
 
110
- iface.launch()
 
19
  def detect_pose_and_activity(video_file):
20
  """
21
  Process the uploaded video to detect human poses and classify activity.
22
+ Video is limited to 10 seconds. Returns annotated video and predicted action.
 
 
 
 
23
  """
24
  try:
25
  # Save uploaded video temporarily
 
33
 
34
  fps = cap.get(cv2.CAP_PROP_FPS)
35
  if fps == 0:
36
+ fps = 30 # fallback
37
 
38
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
39
  max_frames = int(min(total_frames/fps, 10) * fps) # limit 10s
40
 
41
  output_frames = []
42
+ action_predictions = []
43
 
44
+ # Process frames
45
+ with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose:
46
+ for _ in range(max_frames):
 
 
 
47
  ret, frame = cap.read()
48
  if not ret:
49
  break
50
 
51
+ # Pose detection
52
+ image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
53
+ results = pose.process(image_rgb)
 
 
 
54
  if results.pose_landmarks:
55
  mp.solutions.drawing_utils.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
56
 
57
  output_frames.append(frame)
58
 
59
+ # Convert frame to PIL image for Hugging Face model
60
+ pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
61
+ pred = action_model(pil_image)
62
+ action_predictions.append(pred[0]['label'])
 
 
63
 
64
  cap.release()
65
 
66
  if len(output_frames) == 0:
67
  return None, "Error: No frames to process."
68
 
 
 
 
 
69
  # Take the most frequent predicted action
70
+ action_label = max(set(action_predictions), key=action_predictions.count)
71
 
72
  # Save annotated video
73
  output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
 
77
  out.write(f)
78
  out.release()
79
 
80
+ return output_file, f"Predicted Action: {action_label}"
81
 
82
  except Exception as e:
83
  return None, f"Runtime Error: {str(e)}"
 
87
  fn=detect_pose_and_activity,
88
  inputs=gr.Video(label="Upload a Video (max 10s)"),
89
  outputs=[gr.Video(label="Pose Detection Output"), gr.Textbox(label="Detected Action")],
90
+ title="Human Pose & Activity Recognition",
91
+ description="Upload a short video (max 10s). The app detects human poses and predicts the activity (e.g., dancing, cycling, running)."
92
  )
93
 
94
+ iface.launch()