James040 commited on
Commit
549107e
·
verified ·
1 Parent(s): 36f5403

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -41
app.py CHANGED
@@ -4,13 +4,14 @@ import gradio as gr
4
  import subprocess
5
  import urllib.request
6
  import os
 
7
 
8
- # 1. Use the Modern Tasks API
9
  import mediapipe as mp
10
  from mediapipe.tasks import python
11
  from mediapipe.tasks.python import vision
12
 
13
- # Auto-Download the Pose Model for the CPU
14
  MODEL_PATH = "pose_landmarker_lite.task"
15
  MODEL_URL = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task"
16
 
@@ -18,7 +19,6 @@ if not os.path.exists(MODEL_PATH):
18
  print("Downloading MediaPipe Pose Model...")
19
  urllib.request.urlretrieve(MODEL_URL, MODEL_PATH)
20
 
21
- # 2. Hardcode Skeleton Connections (Bypassing the broken drawing_utils)
22
  POSE_CONNECTIONS = [
23
  (0, 1), (1, 2), (2, 3), (3, 7), (0, 4), (4, 5), (5, 6), (6, 8), (9, 10),
24
  (11, 12), (11, 13), (13, 15), (15, 17), (15, 19), (15, 21), (17, 19),
@@ -27,12 +27,13 @@ POSE_CONNECTIONS = [
27
  (28, 30), (29, 31), (30, 32), (27, 31), (28, 32)
28
  ]
29
 
30
- def extract_pose(video_path):
31
  if video_path is None:
32
- return None
33
 
34
- output_path = "final_output.mp4"
35
  temp_video = "temp_silent.mp4"
 
36
 
37
  cap = cv2.VideoCapture(video_path)
38
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -42,13 +43,15 @@ def extract_pose(video_path):
42
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
43
  out = cv2.VideoWriter(temp_video, fourcc, fps, (width, height))
44
 
45
- # 3. Configure Tasks API for Video Processing
46
  base_options = python.BaseOptions(model_asset_path=MODEL_PATH)
47
  options = vision.PoseLandmarkerOptions(
48
  base_options=base_options,
49
  running_mode=vision.RunningMode.VIDEO
50
  )
51
 
 
 
 
52
  with vision.PoseLandmarker.create_from_options(options) as landmarker:
53
  frame_idx = 0
54
  while cap.isOpened():
@@ -56,64 +59,91 @@ def extract_pose(video_path):
56
  if not ret:
57
  break
58
 
59
- # Format frame for Tasks API
60
  rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
61
  mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
62
-
63
- # Strict timestamp required for video mode
64
  timestamp_ms = int((frame_idx / fps) * 1000)
65
 
66
- # Run Inference
67
  result = landmarker.detect_for_video(mp_image, timestamp_ms)
68
-
69
- # Pure Black Canvas
70
  canvas = np.zeros((height, width, 3), dtype=np.uint8)
71
 
72
- # 4. Draw Meaty Lines Manually
73
- if result.pose_landmarks:
74
- for pose in result.pose_landmarks:
75
- # Draw Thick Green Bones
76
- for connection in POSE_CONNECTIONS:
77
- start_idx, end_idx = connection
78
- start_pt = pose[start_idx]
79
- end_pt = pose[end_idx]
80
-
81
- start_px = (int(start_pt.x * width), int(start_pt.y * height))
82
- end_px = (int(end_pt.x * width), int(end_pt.y * height))
83
- cv2.line(canvas, start_px, end_px, (0, 255, 0), 10)
84
-
85
- # Draw Large White Joints
86
- for landmark in pose:
87
- px = (int(landmark.x * width), int(landmark.y * height))
88
- cv2.circle(canvas, px, 15, (255, 255, 255), -1)
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  out.write(canvas)
91
  frame_idx += 1
92
 
93
  cap.release()
94
  out.release()
95
 
 
 
 
 
96
  # Merge Audio Native FFmpeg
97
  try:
98
  command = [
99
  "ffmpeg", "-y", "-i", temp_video, "-i", video_path,
100
  "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0?",
101
- "-shortest", output_path
102
  ]
103
  subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
104
- return output_path
105
  except Exception as e:
106
  print("FFmpeg error:", e)
107
- return temp_video
 
 
 
108
 
109
- # UI Setup
110
- interface = gr.Interface(
111
- fn=extract_pose,
112
- inputs=gr.Video(label="Upload Dancing Clip (15-30s)"),
113
- outputs=gr.Video(label="Meaty Stickman Output"),
114
- title="Tasks API Pose Extractor",
115
- description="Uses modern MediaPipe Tasks to generate thick tracking lines for EbSynth."
116
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  if __name__ == "__main__":
119
  interface.launch()
 
4
  import subprocess
5
  import urllib.request
6
  import os
7
+ import json
8
 
9
+ # 1. Modern Tasks API
10
  import mediapipe as mp
11
  from mediapipe.tasks import python
12
  from mediapipe.tasks.python import vision
13
 
14
+ # Auto-Download Model
15
  MODEL_PATH = "pose_landmarker_lite.task"
16
  MODEL_URL = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task"
17
 
 
19
  print("Downloading MediaPipe Pose Model...")
20
  urllib.request.urlretrieve(MODEL_URL, MODEL_PATH)
21
 
 
22
  POSE_CONNECTIONS = [
23
  (0, 1), (1, 2), (2, 3), (3, 7), (0, 4), (4, 5), (5, 6), (6, 8), (9, 10),
24
  (11, 12), (11, 13), (13, 15), (15, 17), (15, 19), (15, 21), (17, 19),
 
27
  (28, 30), (29, 31), (30, 32), (27, 31), (28, 32)
28
  ]
29
 
30
+ def extract_pose_and_data(video_path):
31
  if video_path is None:
32
+ return None, None, None
33
 
34
+ output_video_path = "final_output.mp4"
35
  temp_video = "temp_silent.mp4"
36
+ output_json_path = "pose_data.json"
37
 
38
  cap = cv2.VideoCapture(video_path)
39
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 
43
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
44
  out = cv2.VideoWriter(temp_video, fourcc, fps, (width, height))
45
 
 
46
  base_options = python.BaseOptions(model_asset_path=MODEL_PATH)
47
  options = vision.PoseLandmarkerOptions(
48
  base_options=base_options,
49
  running_mode=vision.RunningMode.VIDEO
50
  )
51
 
52
+ # Storage for Blender Data
53
+ all_frames_data = []
54
+
55
  with vision.PoseLandmarker.create_from_options(options) as landmarker:
56
  frame_idx = 0
57
  while cap.isOpened():
 
59
  if not ret:
60
  break
61
 
 
62
  rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
  mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
 
 
64
  timestamp_ms = int((frame_idx / fps) * 1000)
65
 
 
66
  result = landmarker.detect_for_video(mp_image, timestamp_ms)
 
 
67
  canvas = np.zeros((height, width, 3), dtype=np.uint8)
68
 
69
+ frame_entry = {
70
+ "frame": frame_idx,
71
+ "timestamp_ms": timestamp_ms,
72
+ "landmarks": []
73
+ }
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ if result.pose_landmarks and result.pose_world_landmarks:
76
+ # 1. Extract 3D World Data for JSON (For Blender)
77
+ for landmark in result.pose_world_landmarks[0]:
78
+ frame_entry["landmarks"].append({
79
+ "x": landmark.x,
80
+ "y": landmark.y,
81
+ "z": landmark.z,
82
+ "visibility": landmark.visibility
83
+ })
84
+
85
+ # 2. Draw 2D Data for Video (For EbSynth)
86
+ pose = result.pose_landmarks[0]
87
+ for connection in POSE_CONNECTIONS:
88
+ start_idx, end_idx = connection
89
+ start_pt, end_pt = pose[start_idx], pose[end_idx]
90
+ start_px = (int(start_pt.x * width), int(start_pt.y * height))
91
+ end_px = (int(end_pt.x * width), int(end_pt.y * height))
92
+ cv2.line(canvas, start_px, end_px, (0, 255, 0), 10)
93
+
94
+ for landmark in pose:
95
+ px = (int(landmark.x * width), int(landmark.y * height))
96
+ cv2.circle(canvas, px, 15, (255, 255, 255), -1)
97
+
98
+ all_frames_data.append(frame_entry)
99
  out.write(canvas)
100
  frame_idx += 1
101
 
102
  cap.release()
103
  out.release()
104
 
105
+ # Save the JSON file
106
+ with open(output_json_path, 'w') as f:
107
+ json.dump(all_frames_data, f, indent=4)
108
+
109
  # Merge Audio Native FFmpeg
110
  try:
111
  command = [
112
  "ffmpeg", "-y", "-i", temp_video, "-i", video_path,
113
  "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0?",
114
+ "-shortest", output_video_path
115
  ]
116
  subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
117
  except Exception as e:
118
  print("FFmpeg error:", e)
119
+ output_video_path = temp_video
120
+
121
+ # Return: Video File, JSON File (for download), JSON Dictionary (for UI Copying)
122
+ return output_video_path, output_json_path, all_frames_data
123
 
124
+ # Gradio UI Setup
125
+ with gr.Blocks(title="Pose & 3D Data Extractor") as interface:
126
+ gr.Markdown("# 🕺 Pose Video & 3D JSON Extractor")
127
+ gr.Markdown("Generates a thick stickman for EbSynth and extracts `pose_world_landmarks` (x, y, z) for Blender IK.")
128
+
129
+ with gr.Row():
130
+ with gr.Column():
131
+ video_input = gr.Video(label="Upload Dancing Clip (15-30s)")
132
+ submit_btn = gr.Button("Extract Pose & Data", variant="primary")
133
+
134
+ with gr.Column():
135
+ video_output = gr.Video(label="Meaty Stickman Output")
136
+ file_output = gr.File(label="Download 3D JSON Data")
137
+
138
+ with gr.Row():
139
+ # The gr.JSON component automatically includes a "Copy" button in the top right
140
+ json_output = gr.JSON(label="Raw JSON Data (Click top right to Copy)")
141
+
142
+ submit_btn.click(
143
+ fn=extract_pose_and_data,
144
+ inputs=video_input,
145
+ outputs=[video_output, file_output, json_output]
146
+ )
147
 
148
  if __name__ == "__main__":
149
  interface.launch()