Spaces:

uncertainrods
/

smashfix-v1

Sleeping

App Files Files Community

smashfix-v1 / src /preprocess_hybrid.py

uncertainrods

v1-try-deploy

0d0412d 2 months ago

raw

history blame contribute delete

11.8 kB

	"""
	Hybrid Feature Preprocessing Pipeline
	======================================

	Streaming video processor for extracting fused pose+CNN features from raw
	badminton footage. Combines MediaPipe pose landmarks with MobileNetV2 visual
	embeddings for hybrid classification.

	Key Features:
	- Dual-feature extraction: 3D pose (99D) + CNN visual (128D)
	- Pose-guided ROI cropping for CNN input
	- Raw landmark preservation for KSI evaluation
	- Memory-efficient streaming processing
	- Sliding window segmentation with stride
	- Temporal smoothing via bounding box tracking

	Processing Pipeline:
	1. Load video and determine segment bounds
	2. For each frame in segment:
	a. Apply crop configuration
	b. Extract 3D pose via MediaPipe
	c. Compute pose-guided ROI bounding box
	d. Extract CNN features via MobileNetV2
	e. Fuse pose + CNN features
	f. Store raw landmarks for KSI
	3. Save windows with features and landmarks
	4. Cleanup resources

	Output Format:
	.npz files with:
	- 'features': (T, 99+CNN_DIM) fused pose+CNN features
	- 'raw_landmarks': (T, 33, 3) normalized pose for KSI
	- 'fps': Original video frame rate

	Dependencies:
	External: cv2, numpy, tensorflow, yaml, tqdm
	Internal: features.HybridFeatureExtractor, utils.normalize_pose

	Configuration (params.yaml):
	hybrid_pipeline:
	data_path: Output directory for processed features
	cnn_feature_dim: CNN embedding dimension (default: 128)
	cnn_input_size: CNN input resolution (default: 224)
	sequence_length: Frames per window
	stride: Sliding window step size
	crop_config: Frame cropping parameters
	cnn_roi: Pose-guided ROI configuration
	mediapipe: MediaPipe Pose configuration

	Usage:
	python preprocess_hybrid.py

	Author: IPD Research Team
	Version: 1.0.0
	"""

	import os
	import yaml
	import cv2
	import numpy as np
	import gc
	import sys
	import argparse
	import mediapipe as mp
	from tqdm import tqdm
	from collections import deque
	from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

	from features import HybridFeatureExtractor
	from utils import normalize_pose, should_skip_crop, get_segment_bounds, resolve_crop_config_for_video


	def process_video_streaming(
	video_path,
	output_dir,
	extractor,
	seq_len,
	stride,
	crop_config,
	segment_rules=None,
	roi_cfg=None,
	visualize=False,
	max_windows=None,
	):
	"""Stream video -> fused features -> sliding windows saved to disk.

	Saves both:
	- 'features': engineered features (pose + CNN) for model training
	- 'raw_landmarks': raw (T, 33, 3) pose landmarks for KSI evaluation
	"""
	filename = os.path.basename(video_path)
	file_id = os.path.splitext(filename)[0]

	if os.path.exists(os.path.join(output_dir, f"{file_id}_win_0.npz")):
	return

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return

	fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)

	start_frame, segment_frames = get_segment_bounds(
	video_path,
	fps,
	total_frames,
	default_seconds=1.75,
	segment_cfg=segment_rules,
	)
	cap.set(cv2.CAP_PROP_POS_FRAMES, int(start_frame))

	skip_crop = should_skip_crop(filename)
	zeros_pose = np.zeros(99, dtype=np.float32)
	zeros_landmarks = np.zeros((33, 3), dtype=np.float32)
	last_pose = None
	last_landmarks = None
	last_box = None

	# Visualization Setup
	mp_drawing = mp.solutions.drawing_utils
	mp_pose = mp.solutions.pose

	window_buffer = deque(maxlen=seq_len)
	landmarks_buffer = deque(maxlen=seq_len)
	collected_windows = [] # Collect all windows, then subsample
	saved_count = 0
	frame_idx = 0

	try:
	while frame_idx < int(segment_frames):
	ret, frame = cap.read()
	if not ret:
	break
	frame_idx += 1

	if skip_crop:
	frame_cropped = frame
	else:
	h, w = frame.shape[:2]
	start_row = int(h * crop_config['top'])
	end_row = h - int(h * crop_config['bottom'])
	start_col = int(w * crop_config['left'])
	end_col = w - int(w * crop_config['right'])
	frame_cropped = frame[start_row:end_row, start_col:end_col]
	if frame_cropped.size == 0:
	continue

	res = extractor.pose.process(cv2.cvtColor(frame_cropped, cv2.COLOR_BGR2RGB))
	if res.pose_landmarks:
	lm = np.array([[l.x, l.y, l.z] for l in res.pose_landmarks.landmark], dtype=np.float32)
	pose_flat = normalize_pose(lm).astype(np.float32).flatten()
	last_pose = pose_flat
	last_landmarks = lm.copy()
	else:
	pose_flat = last_pose if last_pose is not None else zeros_pose
	lm = last_landmarks if last_landmarks is not None else zeros_landmarks

	h2, w2 = frame_cropped.shape[:2]
	box = extractor._compute_pose_roi_box(
	res.pose_landmarks if hasattr(res, 'pose_landmarks') else None,
	w2,
	h2,
	roi_cfg,
	last_box=last_box,
	)
	last_box = box if box is not None else last_box
	roi_frame = extractor._crop_with_box(frame_cropped, box)

	img_size = getattr(extractor, 'cnn_input_size', 224)
	img = cv2.resize(roi_frame, (img_size, img_size))

	# --- VISUALIZATION (Mimics realtime_hybrid.py) ---
	if visualize:
	overlay = frame.copy()
	h, w = frame.shape[:2]

	# Draw ROI Box
	if box is not None and not skip_crop:
	# Map cropped box to original definition
	# Note: Preprocessing crop_config is applied relative to frame
	top_off = int(h * crop_config['top']) if not skip_crop else 0
	left_off = int(w * crop_config['left']) if not skip_crop else 0

	x1, y1, x2, y2 = box
	cv2.rectangle(
	overlay,
	(left_off + x1, top_off + y1),
	(left_off + x2, top_off + y2),
	(0, 255, 255),
	2,
	)

	# Draw Landmarks
	if res.pose_landmarks:
	# Draw on crop view then paste back to overlay
	viz_crop = frame_cropped.copy()
	mp_drawing.draw_landmarks(
	viz_crop,
	res.pose_landmarks,
	mp_pose.POSE_CONNECTIONS
	)

	# Paste back
	start_row = int(h * crop_config['top']) if not skip_crop else 0
	end_row = h - int(h * crop_config['bottom']) if not skip_crop else h
	start_col = int(w * crop_config['left']) if not skip_crop else 0
	end_col = w - int(w * crop_config['right']) if not skip_crop else w

	# Ensure dimensions match (sometimes rounding errors occur)
	viz_h, viz_w = viz_crop.shape[:2]
	overlay_h, overlay_w = overlay[start_row:end_row, start_col:end_col].shape[:2]

	if viz_h == overlay_h and viz_w == overlay_w:
	overlay[start_row:end_row, start_col:end_col] = viz_crop

	cv2.imshow('Preprocessing Window', overlay)
	if cv2.waitKey(1) & 0xFF == ord('q'):
	print("Exiting visualization...")
	sys.exit(0)

	img = preprocess_input(np.expand_dims(img[..., ::-1], axis=0))
	cnn_feat = extractor.rgb_model.predict(img, verbose=0)[0].astype(np.float32)

	fused = np.concatenate([pose_flat, cnn_feat], axis=0)
	window_buffer.append(fused)
	landmarks_buffer.append(lm) # NEW: append raw landmarks

	# Collect windows on fixed stride relative to the segment start
	if len(window_buffer) == seq_len and ((frame_idx - seq_len) % stride == 0):
	collected_windows.append((
	np.asarray(window_buffer, dtype=np.float32),
	np.asarray(landmarks_buffer, dtype=np.float32),
	float(fps)
	))

	del frame
	del frame_cropped
	del img
	del fused

	finally:
	cap.release()

	# Subsample windows if max_windows is set (anti-overfitting)
	if max_windows is not None and len(collected_windows) > max_windows:
	rng = np.random.RandomState(hash(file_id) % (2**31))
	indices = rng.choice(len(collected_windows), max_windows, replace=False)
	indices.sort() # preserve temporal order
	collected_windows = [collected_windows[i] for i in indices]

	# Save collected (possibly subsampled) windows
	for i, (feat, lm, fps_val) in enumerate(collected_windows):
	save_path = os.path.join(output_dir, f"{file_id}_win_{i}.npz")
	np.savez(save_path, features=feat, raw_landmarks=lm, fps=fps_val)

	del window_buffer
	del landmarks_buffer
	del collected_windows
	gc.collect()


	def main():
	parser = argparse.ArgumentParser(description="Hybrid Preprocessing Pipeline")
	parser.add_argument("--visualize", action='store_true', help="Visualize preprocessing steps (landmarks, ROI)")
	args = parser.parse_args()

	with open("params.yaml") as f:
	params = yaml.safe_load(f)

	raw_dir = params['base']['raw_data_path']
	cfg = params['hybrid_pipeline']
	mp_cfg = params['mediapipe']
	segment_rules = params.get('segment_rules', {})
	crop_overrides = params.get('crop_overrides', {})
	max_windows = segment_rules.get('max_windows_per_video')
	out_dir = cfg['data_path']

	os.makedirs(out_dir, exist_ok=True)
	if not os.path.exists(raw_dir):
	return

	extractor = HybridFeatureExtractor(
	mp_cfg,
	cnn_dim=cfg['cnn_feature_dim'],
	cnn_input_size=cfg.get('cnn_input_size', 224),
	rsn_weights_path=cfg.get('rsn_pretrained_weights'),
	)
	try:
	for cls in os.listdir(raw_dir):
	cls_in = os.path.join(raw_dir, cls)
	if not os.path.isdir(cls_in):
	continue

	cls_out = os.path.join(out_dir, cls)
	os.makedirs(cls_out, exist_ok=True)

	videos = [v for v in os.listdir(cls_in) if v.lower().endswith(('.mp4', '.avi', '.mov', '.webm'))]
	for vid in tqdm(videos, desc=f"Hybrid Prep {cls}"):
	video_path = os.path.join(cls_in, vid)
	crop_cfg = resolve_crop_config_for_video(video_path, cfg['crop_config'], crop_overrides)
	process_video_streaming(
	video_path,
	cls_out,
	extractor,
	cfg['sequence_length'],
	cfg['stride'],
	crop_cfg,
	segment_rules,
	roi_cfg=cfg.get('cnn_roi'),
	visualize=args.visualize,
	max_windows=max_windows,
	)
	finally:
	if args.visualize:
	cv2.destroyAllWindows()
	try:
	extractor.pose.close()
	except Exception:
	pass
	del extractor
	gc.collect()


	if __name__ == "__main__":
	main()