Spaces:
Sleeping
Sleeping
File size: 7,137 Bytes
0d0412d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | """
Pose Feature Preprocessing Pipeline
====================================
Streaming video processor for extracting normalized 3D pose features from
raw badminton video footage. Designed for memory efficiency with O(1) memory
relative to video length.
Key Features:
- Frame-by-frame streaming extraction (no full video loading)
- MediaPipe Pose for 3D landmark detection
- Geometric normalization (hip-centered, spine-aligned)
- Sliding window segmentation with configurable stride
- Incremental processing (skips already-processed videos)
- Segment-based extraction (tail/middle of shot videos)
- Per-video crop configuration support
Processing Pipeline:
1. Load video and determine segment bounds
2. For each frame in segment:
a. Apply crop if not pre-cropped file
b. Extract 3D pose via MediaPipe
c. Normalize to person-centric coordinates
d. Add to rolling window buffer
3. Save windows to disk on stride boundaries
4. Cleanup resources (explicit garbage collection)
Output Format:
.npz files with:
- 'features': (T, 99) normalized pose features
- 'fps': Original video frame rate
Memory Management:
- Rolling deque buffer (maxlen=sequence_length)
- Immediate frame cleanup after processing
- Periodic garbage collection every 10 videos
Dependencies:
External: cv2, numpy, mediapipe, yaml, tqdm
Internal: utils.normalize_pose, utils.get_segment_bounds
Configuration (params.yaml):
pose_pipeline:
data_path: Output directory for processed features
sequence_length: Frames per window
stride: Sliding window step size
crop_config: Frame cropping parameters
mediapipe: MediaPipe Pose configuration
Usage:
python preprocess_pose.py
Author: IPD Research Team
Version: 1.0.0
"""
import os
import yaml
import cv2
import numpy as np
import mediapipe as mp
import gc
from tqdm import tqdm
from collections import deque
from utils import normalize_pose, should_skip_crop, get_segment_bounds, resolve_crop_config_for_video
def get_pose_model(mp_config):
"""Helper to initialize MediaPipe Pose."""
return mp.solutions.pose.Pose(
static_image_mode=False,
model_complexity=mp_config['model_complexity'],
min_detection_confidence=mp_config['min_detection_confidence'],
min_tracking_confidence=mp_config['min_tracking_confidence']
)
def process_video_streaming(video_path, output_dir, crop_config, mp_config, seq_len, stride, segment_rules=None):
"""
Processes video frame-by-frame and saves windows immediately.
Uses O(1) memory relative to video length.
"""
filename = os.path.basename(video_path)
file_id = os.path.splitext(filename)[0]
if os.path.exists(os.path.join(output_dir, f"{file_id}_win_0.npz")):
return
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
start_frame, tail_frames = get_segment_bounds(video_path, fps, total_frames, default_seconds=1.75, segment_cfg=segment_rules)
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
skip_crop = should_skip_crop(filename)
window_buffer = deque(maxlen=seq_len)
zeros_pose = np.zeros(99, dtype=np.float32)
last_pose = None
pose = get_pose_model(mp_config)
frame_idx = 0
saved_count = 0
try:
while frame_idx < int(tail_frames):
ret, frame = cap.read()
if not ret:
break
if skip_crop:
frame_cropped = frame
else:
h, w = frame.shape[:2]
frame_cropped = frame[
int(h*crop_config['top']):h-int(h*crop_config['bottom']),
int(w*crop_config['left']):w-int(w*crop_config['right'])
]
if frame_cropped.size == 0:
pose_vec = last_pose if last_pose is not None else zeros_pose
window_buffer.append(pose_vec)
if len(window_buffer) == seq_len and ((frame_idx - (seq_len - 1)) % stride == 0):
save_path = os.path.join(output_dir, f"{file_id}_win_{saved_count}.npz")
np.savez(save_path, features=np.array(window_buffer), fps=fps)
saved_count += 1
frame_idx += 1
del frame
del frame_cropped
continue
image_rgb = cv2.cvtColor(frame_cropped, cv2.COLOR_BGR2RGB)
res = pose.process(image_rgb)
del frame
del frame_cropped
del image_rgb
if res.pose_world_landmarks:
lm = np.array([[l.x, l.y, l.z] for l in res.pose_world_landmarks.landmark], dtype=np.float32)
pose_vec = normalize_pose(lm).flatten().astype(np.float32)
last_pose = pose_vec
else:
pose_vec = last_pose if last_pose is not None else zeros_pose
window_buffer.append(pose_vec)
# Save windows on a fixed stride relative to the seeked start_frame
if len(window_buffer) == seq_len and ((frame_idx - (seq_len - 1)) % stride == 0):
save_path = os.path.join(output_dir, f"{file_id}_win_{saved_count}.npz")
np.savez(save_path, features=np.array(window_buffer), fps=fps)
saved_count += 1
frame_idx += 1
finally:
# Ensure resources are freed
cap.release()
pose.close()
del pose
del window_buffer
gc.collect()
def main():
with open("params.yaml") as f: params = yaml.safe_load(f)
cfg = params['pose_pipeline']
mp_cfg = params['mediapipe']
segment_rules = params.get('segment_rules', {})
crop_overrides = params.get('crop_overrides', {})
raw_dir = params['base']['raw_data_path']
out_dir = cfg['data_path']
os.makedirs(out_dir, exist_ok=True)
if not os.path.exists(raw_dir): return
for cls in os.listdir(raw_dir):
cls_in = os.path.join(raw_dir, cls)
cls_out = os.path.join(out_dir, cls)
if not os.path.isdir(cls_in): continue
os.makedirs(cls_out, exist_ok=True)
videos = [v for v in os.listdir(cls_in) if v.endswith(('.mp4', '.avi', '.mov'))]
for i, vid in enumerate(tqdm(videos, desc=f"Pose Prep {cls}")):
video_path = os.path.join(cls_in, vid)
crop_cfg = resolve_crop_config_for_video(video_path, cfg['crop_config'], crop_overrides)
process_video_streaming(
video_path,
cls_out,
crop_cfg,
mp_cfg,
cfg['sequence_length'],
cfg['stride'],
segment_rules
)
# Aggressive Garbage Collection every 10 videos to prevent memory creep
if i % 10 == 0:
gc.collect()
if __name__ == "__main__":
main() |