Spaces:

uncertainrods
/

smashfix-v1

Sleeping

App Files Files Community

smashfix-v1 / src /utils.py

uncertainrods

v1-try-deploy

0d0412d about 2 months ago

raw

history blame contribute delete

8 kB

	"""
	Utility Functions for Pose Processing
	======================================

	Core utility functions supporting video preprocessing and pose normalization
	across both the pose and hybrid pipelines. Handles geometric transformations,
	video segment extraction, and crop configuration resolution.

	Key Components:
	1. Pose Normalization (normalize_pose, normalize_sequence)
	- Person-centric coordinate system transformation
	- Three-step process: Centering → Alignment → Scaling
	- Hip-centered with spine-aligned Y-axis
	- Scale-invariant via spine length normalization

	2. Video Segment Extraction (get_segment_bounds)
	- Extracts relevant portions of shot videos
	- Supports tail-based, middle-based, or full extraction
	- Configurable via segment_rules in params.yaml

	3. Crop Configuration (resolve_crop_config_for_video)
	- Per-video crop overrides based on video number
	- Supports wildcard patterns for batch configuration
	- Handles pre-cropped files with "(N)" naming pattern

	4. File Pattern Detection (should_skip_crop, extract_video_number)
	- Detects pre-cropped files that should skip cropping
	- Extracts video numbers for per-video configuration

	Normalization Algorithm:
	1. Center pose at hip midpoint (joints 23, 24)
	2. Align Y-axis with spine direction (hip → shoulder center)
	3. Compute orthogonal X-axis from shoulder vector
	4. Z-axis via cross product for right-handed system
	5. Scale by spine length for size invariance

	Dependencies:
	External: numpy, re

	Configuration (params.yaml):
	segment_rules: Video segment extraction settings
	crop_overrides: Per-video crop configuration

	Author: IPD Research Team
	Version: 1.0.0
	"""

	import numpy as np
	import re


	def normalize_pose(keypoints_3d):
	"""
	Normalizes a single 3D pose (33, 3) to a standard, person-centric coordinate system.
	(Centering -> Alignment -> Scaling)
	"""
	if keypoints_3d.shape != (33, 3): return keypoints_3d

	LEFT_SHOULDER, RIGHT_SHOULDER = 11, 12
	LEFT_HIP, RIGHT_HIP = 23, 24

	# 1. Centering
	hip_center = (keypoints_3d[LEFT_HIP] + keypoints_3d[RIGHT_HIP]) / 2.0
	centered = keypoints_3d - hip_center

	# 2. Alignment
	shoulder_center = (centered[LEFT_SHOULDER] + centered[RIGHT_SHOULDER]) / 2.0
	spine_len = np.linalg.norm(shoulder_center)

	if spine_len < 1e-6: return centered

	new_y = shoulder_center / spine_len
	right_shoulder_vec = centered[RIGHT_SHOULDER] - centered[LEFT_SHOULDER]

	proj = np.dot(right_shoulder_vec, new_y) * new_y
	new_x = right_shoulder_vec - proj

	if np.linalg.norm(new_x) < 1e-6:
	new_x = np.cross(new_y, [0, 1, 0]) if abs(new_y[0]) > 0.5 else np.cross(new_y, [1, 0, 0])

	new_x /= np.linalg.norm(new_x)
	new_z = np.cross(new_x, new_y)

	rotation = np.array([new_x, new_y, new_z])
	aligned = np.dot(centered, rotation.T)

	# 3. Scaling
	return aligned / spine_len

	def normalize_sequence(keypoints_sequence):
	"""Applies pose normalization to an entire sequence of frames."""
	return np.array([normalize_pose(frame) for frame in keypoints_sequence])


	def should_skip_crop(video_path_or_name: str) -> bool:
	"""Return True if the filename matches the pattern "name (N).ext".

	This matches filenames that contain a space followed by a parenthesized
	integer index before the extension, e.g. "backhand_drive (1).mp4".
	The check accepts either a full path or a bare filename.
	"""
	import re, os
	name = os.path.basename(video_path_or_name)
	# Match 'something (123).ext' where the number is one or more digits
	return re.search(r"\s$\d+$\.[^.]+$", name) is not None


	def extract_video_number(video_path_or_name: str) -> int \| None:
	"""Extract the leading numeric id from a filename/path.

	Examples:
	- "001.mp4" -> 1
	- "006_win_3.npz" -> 6
	- "12_some_name.mov" -> 12

	Returns None if no leading number exists.
	"""
	import os
	import re

	name = os.path.basename(video_path_or_name)
	m = re.match(r"^(\d+)", name)
	if not m:
	return None
	try:
	return int(m.group(1))
	except Exception:
	return None


	def resolve_crop_config_for_video(
	video_path: str,
	base_crop_config: dict,
	crop_overrides: dict \| None = None,
	) -> dict:
	"""Return crop_config with per-shot per-video overrides applied.

	- Only overrides keys present in the matching rule (currently just 'bottom').
	- Matches shot name by checking path components against crop_overrides keys.
	- Matches by leading video number in filename.
	- If multiple ranges match, the last matching range wins.
	"""
	import copy
	import os

	effective = copy.deepcopy(base_crop_config) if base_crop_config is not None else {
	'top': 0.0,
	'bottom': 0.0,
	'left': 0.0,
	'right': 0.0,
	}

	if not crop_overrides:
	return effective

	video_num = extract_video_number(video_path)
	if video_num is None:
	return effective

	comps = set(os.path.normpath(video_path).split(os.path.sep))
	matched_shot = None
	for shot in crop_overrides.keys():
	if shot in comps:
	matched_shot = shot
	break

	if not matched_shot:
	return effective

	rules = crop_overrides.get(matched_shot) or []
	for rule in rules:
	try:
	start = int(rule.get('start'))
	end = int(rule.get('end'))
	except Exception:
	continue

	if start <= video_num <= end:
	for k, v in rule.items():
	if k in {'start', 'end'}:
	continue
	effective[k] = v

	return effective


	def get_tail_seconds_for_video(video_path: str, default: float = 1.75) -> float:
	"""Return the tail window length in seconds for a given video path.

	Special-case: use 2.0 seconds for forehand_lift and forehand_clear shots.
	The check looks at any path component equal to those shot names so it
	works with full paths or relative paths.
	"""
	import os
	p = os.path.normpath(video_path)
	comps = p.split(os.path.sep)
	shots_2s = {'forehand_lift', 'forehand_clear'}
	if any(c in shots_2s for c in comps):
	return 2.0
	return default


	def get_segment_bounds(
	video_path: str,
	fps: float,
	total_frames: int,
	default_seconds: float = 1.75,
	segment_cfg: dict \| None = None,
	):
	"""Return (start_frame, frame_count) for the segment to process for a given video.

	Config-driven rules (segment_cfg):
	- default_seconds: fallback window length (tail).
	- tail_seconds: window length for tail_shots.
	- tail_shots: list of shot folder names to use tail_seconds.
	- middle_shots: mapping of shot folder name -> seconds for middle window.

	If no config is provided, uses hardcoded defaults (tail, 1.75s).
	"""
	import os

	if segment_cfg is None:
	segment_cfg = {}

	default_seconds = segment_cfg.get('default_seconds', default_seconds)
	tail_seconds = segment_cfg.get('tail_seconds', default_seconds)
	tail_shots = set(segment_cfg.get('tail_shots', []))
	middle_shots = segment_cfg.get('middle_shots', {})

	p = os.path.normpath(video_path)
	comps = p.split(os.path.sep)

	# Middle-shot rule
	for shot, secs in middle_shots.items():
	if shot in comps:
	frames = int(float(secs) * fps)
	center = total_frames // 2
	start = max(0, center - frames // 2)
	return start, frames

	# Tail-shot rule
	if any(shot in comps for shot in tail_shots):
	frames = int(float(tail_seconds) * fps)
	start = max(0, total_frames - frames)
	return start, frames

	# Default tail
	frames = int(float(default_seconds) * fps)
	start = max(0, total_frames - frames)
	return start, frames