Spaces:

YoungjaeDev
/

fall-detection-demo

Sleeping

File size: 26,955 Bytes

8133f1d

#!/usr/bin/env python3
"""
Skeleton Data Augmentation for ST-GCN Fall Detection

This module provides augmentation strategies for skeleton sequence data to improve
model generalization and robustness. All augmentations preserve the spatial-temporal
structure required by ST-GCN while introducing controlled variations.

Input Format: (C, T, V, M) where
    C = 3 channels (x, y, confidence)
    T = 60 frames (temporal window)
    V = 17 keypoints (COCO skeleton)
    M = 1 person (max persons tracked)

Augmentation Strategies:
1. Horizontal Flip: Mirror skeleton across vertical axis with keypoint swapping
2. Gaussian Noise: Add random noise to x,y coordinates (preserves confidence)
3. Temporal Crop: Random crop + resize to simulate variable fall speeds

Reference: Issue #34 - ST-GCN Training Dataset Creation
"""

import numpy as np
from typing import Tuple, Optional


# COCO 17-keypoint left/right pairs for horizontal flip
# Format: (left_index, right_index)
COCO_LEFT_RIGHT_PAIRS = [
    (1, 2),   # left_eye <-> right_eye
    (3, 4),   # left_ear <-> right_ear
    (5, 6),   # left_shoulder <-> right_shoulder
    (7, 8),   # left_elbow <-> right_elbow
    (9, 10),  # left_wrist <-> right_wrist
    (11, 12), # left_hip <-> right_hip
    (13, 14), # left_knee <-> right_knee
    (15, 16), # left_ankle <-> right_ankle
]


def augment_skeleton(data: np.ndarray, prob: float = 0.5) -> np.ndarray:
    """
    Apply random augmentations to skeleton sequence data.

    This function applies three augmentation strategies with probability `prob`:
    1. Horizontal flip with keypoint swapping
    2. Gaussian noise injection to x,y coordinates
    3. Temporal crop and resize

    Mathematical Formulations:
    -------------------------
    1. Horizontal Flip:
        x' = -x
        For each (left, right) keypoint pair: swap(left, right)

    2. Gaussian Noise:
        x' = x + N(0, sigma^2)
        y' = y + N(0, sigma^2)
        where N(0, sigma^2) ~ Normal(mean=0, std=0.01)

    3. Temporal Crop & Resize:
        T_crop ~ Uniform(0.8 * T, 1.0 * T)
        start_frame ~ Uniform(0, T - T_crop)
        cropped = data[:, start:start+T_crop, :, :]
        resized = interpolate(cropped, T)

    Args:
        data: Skeleton data with shape (C, T, V, M) where
            C = 3 (x, y, confidence)
            T = 60 (number of frames)
            V = 17 (number of keypoints)
            M = 1 (number of persons)
        prob: Probability of applying each augmentation (default: 0.5)

    Returns:
        augmented_data: Augmented skeleton data with same shape (C, T, V, M)

    Example:
        >>> data = np.random.rand(3, 60, 17, 1)
        >>> augmented = augment_skeleton(data, prob=0.5)
        >>> augmented.shape
        (3, 60, 17, 1)
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"
    assert V == 17, f"Expected 17 COCO keypoints, got {V}"
    assert M == 1, f"Expected max 1 person, got {M}"

    # Create a copy to avoid modifying original data
    augmented_data = data.copy()

    # 1. Horizontal Flip (flip x-coordinate + swap left/right keypoints)
    if np.random.rand() < prob:
        augmented_data = _horizontal_flip(augmented_data)

    # 2. Random Noise Injection (add Gaussian noise to x,y only)
    if np.random.rand() < prob:
        augmented_data = _add_gaussian_noise(augmented_data)

    # 3. Temporal Crop and Resize (crop 0.8-1.0 of length, resize back)
    if np.random.rand() < prob:
        augmented_data = _temporal_crop_resize(augmented_data)

    return augmented_data


def _horizontal_flip(data: np.ndarray) -> np.ndarray:
    """
    Horizontally flip skeleton by negating x-coordinate and swapping left/right keypoints.

    Mathematical Formulation:
        x' = -x
        y' = y
        conf' = conf
        For each (left_idx, right_idx) pair: swap keypoints

    Args:
        data: Skeleton data (C, T, V, M)

    Returns:
        flipped_data: Horizontally flipped data (C, T, V, M)
    """
    flipped_data = data.copy()

    # Flip x-coordinate (channel 0)
    flipped_data[0] = -flipped_data[0]

    # Swap left/right keypoint pairs
    for left_idx, right_idx in COCO_LEFT_RIGHT_PAIRS:
        # Swap all channels (x, y, conf) for the keypoint pair
        temp = flipped_data[:, :, left_idx, :].copy()
        flipped_data[:, :, left_idx, :] = flipped_data[:, :, right_idx, :]
        flipped_data[:, :, right_idx, :] = temp

    return flipped_data


def _add_gaussian_noise(data: np.ndarray, std: float = 0.01) -> np.ndarray:
    """
    Add Gaussian noise to x,y coordinates (preserves confidence channel).

    Mathematical Formulation:
        x' = x + N(0, sigma^2)
        y' = y + N(0, sigma^2)
        conf' = conf (unchanged)
        where sigma = 0.01 (default)

    The noise magnitude is calibrated for normalized coordinates in range [-0.5, 0.5].
    With std=0.01, 99.7% of noise values fall within [-0.03, 0.03] (3-sigma rule).

    Args:
        data: Skeleton data (C, T, V, M)
        std: Standard deviation of Gaussian noise (default: 0.01)

    Returns:
        noisy_data: Data with Gaussian noise added to x,y coordinates
    """
    C, T, V, M = data.shape
    noisy_data = data.copy()

    # Generate Gaussian noise for x,y channels only (not confidence)
    noise_shape = (2, T, V, M)  # Only x,y channels
    noise = np.random.normal(0, std, noise_shape).astype(data.dtype)

    # Add noise to x,y channels (0, 1), leave confidence channel (2) unchanged
    noisy_data[:2] += noise

    return noisy_data


def _temporal_crop_resize(data: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.8, 1.0)) -> np.ndarray:
    """
    Randomly crop temporal sequence and resize back to original length.

    This augmentation simulates variable fall speeds by compressing or expanding
    the temporal dimension. A crop ratio of 0.8 means the fall happens 20% faster,
    while 1.0 means no temporal change.

    Mathematical Formulation:
        T_crop ~ Uniform(crop_min * T, crop_max * T)
        start ~ Uniform(0, T - T_crop)
        cropped = data[:, start:start+T_crop, :, :]
        resized = interpolate(cropped, T) using linear interpolation

    Args:
        data: Skeleton data (C, T, V, M)
        crop_ratio_range: (min_ratio, max_ratio) for crop length (default: (0.8, 1.0))

    Returns:
        resized_data: Temporally augmented data with original shape (C, T, V, M)
    """
    C, T, V, M = data.shape
    min_ratio, max_ratio = crop_ratio_range

    # Sample random crop ratio
    crop_ratio = np.random.uniform(min_ratio, max_ratio)
    crop_length = int(T * crop_ratio)
    crop_length = max(1, crop_length)  # Ensure at least 1 frame

    # Sample random start position
    max_start = max(0, T - crop_length)
    start_frame = np.random.randint(0, max_start + 1) if max_start > 0 else 0

    # Extract cropped window
    cropped = data[:, start_frame:start_frame + crop_length, :, :]

    # Resize back to original temporal length using linear interpolation
    resized_data = _temporal_interpolate(cropped, T)

    return resized_data


def _temporal_interpolate(data: np.ndarray, target_length: int) -> np.ndarray:
    """
    Interpolate temporal dimension to target length using linear interpolation.

    This function performs 1D linear interpolation along the temporal axis (axis=1)
    for each channel, keypoint, and person independently.

    Args:
        data: Skeleton data (C, T, V, M)
        target_length: Target number of frames

    Returns:
        interpolated_data: Data with temporal dimension resized to target_length
    """
    C, T_src, V, M = data.shape

    if T_src == target_length:
        return data

    # Create target time indices
    src_indices = np.linspace(0, T_src - 1, T_src)
    target_indices = np.linspace(0, T_src - 1, target_length)

    # Interpolate each channel, keypoint, person combination
    interpolated_data = np.zeros((C, target_length, V, M), dtype=data.dtype)

    for c in range(C):
        for v in range(V):
            for m in range(M):
                interpolated_data[c, :, v, m] = np.interp(
                    target_indices,
                    src_indices,
                    data[c, :, v, m]
                )

    return interpolated_data


def _normalize_by_hip_center(data: np.ndarray) -> np.ndarray:
    """
    Normalize skeleton by hip center position and skeleton size (ST-GCN standard).

    This is the recommended normalization method for skeleton-based action recognition,
    following the ST-GCN paper and NTU RGB+D dataset preprocessing.

    Algorithm:
    ----------
    1. Calculate hip center from left_hip (11) and right_hip (12)
    2. If hips have low confidence (<0.3), fallback to shoulder center
    3. Center all keypoints by subtracting hip center
    4. Calculate skeleton size as average shoulder-to-hip distance
    5. Scale all coordinates by skeleton size

    COCO Keypoints Used:
    - 5: left_shoulder
    - 6: right_shoulder
    - 11: left_hip
    - 12: right_hip

    Args:
        data: Skeleton data (C, T, V, M) with C=3 (x, y, conf)

    Returns:
        normalized_data: (C, T, V, M) centered at hip, scaled by skeleton size
            - x,y channels: relative to hip center, scaled by skeleton size
            - conf channel: unchanged

    Example:
        >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = _normalize_by_hip_center(data)
        >>> # Hip center is now at (0, 0)
        >>> hip_center_x = (normalized[0, :, 11, :] + normalized[0, :, 12, :]) / 2
        >>> np.allclose(hip_center_x, 0.0, atol=1e-6)
        True
    """
    C, T, V, M = data.shape
    normalized_data = data.copy()

    # Extract hip keypoints (COCO: 11=left_hip, 12=right_hip)
    left_hip_xy = data[:2, :, 11:12, :]    # (2, T, 1, M)
    right_hip_xy = data[:2, :, 12:13, :]   # (2, T, 1, M)
    left_hip_conf = data[2:3, :, 11:12, :] # (1, T, 1, M)
    right_hip_conf = data[2:3, :, 12:13, :]# (1, T, 1, M)

    # Calculate average hip confidence across all frames
    left_hip_conf_mean = np.mean(left_hip_conf)
    right_hip_conf_mean = np.mean(right_hip_conf)

    # Determine center point (hip or shoulder fallback)
    if left_hip_conf_mean >= 0.3 and right_hip_conf_mean >= 0.3:
        # Normal case: Use hip center
        center_point = (left_hip_xy + right_hip_xy) / 2.0  # (2, T, 1, M)

        # Calculate skeleton size from shoulder-to-hip distance
        left_shoulder_xy = data[:2, :, 5:6, :]  # (2, T, 1, M)
        right_shoulder_xy = data[:2, :, 6:7, :] # (2, T, 1, M)

        # Left torso distance: ||left_shoulder - left_hip||
        left_torso = left_shoulder_xy - left_hip_xy  # (2, T, 1, M)
        left_torso_dist = np.sqrt(np.sum(left_torso ** 2, axis=0))  # (T, 1, M)

        # Right torso distance: ||right_shoulder - right_hip||
        right_torso = right_shoulder_xy - right_hip_xy  # (2, T, 1, M)
        right_torso_dist = np.sqrt(np.sum(right_torso ** 2, axis=0))  # (T, 1, M)

        # Average skeleton size across frames and left/right
        skeleton_size = np.mean([left_torso_dist, right_torso_dist])  # scalar

    else:
        # Fallback: Use shoulder center if hips not detected
        left_shoulder_xy = data[:2, :, 5:6, :]
        right_shoulder_xy = data[:2, :, 6:7, :]
        center_point = (left_shoulder_xy + right_shoulder_xy) / 2.0  # (2, T, 1, M)

        # Use shoulder width as skeleton size estimate
        shoulder_vector = right_shoulder_xy - left_shoulder_xy  # (2, T, 1, M)
        shoulder_width = np.sqrt(np.sum(shoulder_vector ** 2, axis=0))  # (T, 1, M)
        skeleton_size = np.mean(shoulder_width) * 2.0  # Approximate torso height

    # Prevent division by zero
    skeleton_size = max(skeleton_size, 1e-6)

    # Normalize x,y channels: center and scale
    normalized_data[:2] = (normalized_data[:2] - center_point) / skeleton_size

    # Confidence channel unchanged
    # normalized_data[2] remains as is

    return normalized_data


def _normalize_by_image_center(
    data: np.ndarray,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Legacy normalization by image center (for comparison only).

    This method is NOT recommended for ST-GCN training as it:
    - Includes absolute position information
    - Varies with camera angle
    - Does not normalize body size

    Use this only for comparing with old implementations or specific use cases
    where absolute position in frame matters.

    Args:
        data: Skeleton data (C, T, V, M)
        img_width: Image width in pixels (default: 3840 for AI Hub 4K)
        img_height: Image height in pixels (default: 2160 for AI Hub 4K)

    Returns:
        normalized_data: (C, T, V, M) with x,y in [-0.5, 0.5]
    """
    C, T, V, M = data.shape
    normalized_data = data.copy()

    # Normalize x-coordinate (channel 0): [0, img_width] -> [-0.5, 0.5]
    normalized_data[0] = (normalized_data[0] / img_width) - 0.5

    # Normalize y-coordinate (channel 1): [0, img_height] -> [-0.5, 0.5]
    normalized_data[1] = (normalized_data[1] / img_height) - 0.5

    # Confidence channel (2) remains unchanged in [0, 1]

    return normalized_data


def normalize_skeleton(
    data: np.ndarray,
    method: str = 'hip_center',
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Normalize skeleton coordinates using ST-GCN standard method.

    This normalization removes absolute position information and makes the model
    focus on relative pose patterns, which is critical for fall detection across
    different camera angles (AI Hub 8-camera setup).

    Methods:
    --------
    1. 'hip_center' (default, ST-GCN standard):
       - Center: Hip center (average of left_hip and right_hip)
       - Scale: Skeleton size (shoulder-to-hip distance)
       - Fallback: Shoulder center if hips not detected
       - Reference: ST-GCN (Yan et al., AAAI 2018), NTU RGB+D normalization

    2. 'image_center' (legacy, not recommended):
       - Center: Image center
       - Scale: Image dimensions
       - Use only for comparison with old implementations

    Mathematical Formulations (hip_center):
    ----------------------------------------
    Step 1: Calculate hip center
        hip_center = (left_hip + right_hip) / 2  # COCO keypoints 11, 12

    Step 2: Center all keypoints
        x' = x - hip_center_x
        y' = y - hip_center_y

    Step 3: Scale by skeleton size (shoulder-to-hip distance)
        skeleton_size = mean(||shoulder - hip||) over left and right
        x'' = x' / skeleton_size
        y'' = y' / skeleton_size

    Advantages of hip_center normalization:
    - Camera angle invariant (critical for 8-camera AI Hub dataset)
    - Absolute position independent (person can be anywhere in frame)
    - Body size normalized (tall/short people comparable)
    - Matches ST-GCN paper and most skeleton action recognition works

    Args:
        data: Skeleton data with shape (C, T, V, M) where
            C = 3 (x in pixels, y in pixels, confidence)
            T = number of frames
            V = 17 (COCO keypoints)
            M = 1 (max persons)
        method: Normalization method - 'hip_center' (default) or 'image_center'
        img_width: Image width for image_center method (default: 3840 for AI Hub 4K)
        img_height: Image height for image_center method (default: 2160 for AI Hub 4K)

    Returns:
        normalized_data: Normalized skeleton data with shape (C, T, V, M)
            For hip_center: relative coordinates centered at hip, scaled by skeleton size
            For image_center: x,y in [-0.5, 0.5], conf in [0, 1]

    Example:
        >>> # ST-GCN standard normalization
        >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = normalize_skeleton(data, method='hip_center')
        >>> # Hip is now at origin (0, 0)
        >>> # Coordinates scaled by skeleton size

        >>> # Legacy image center normalization
        >>> normalized_legacy = normalize_skeleton(data, method='image_center')
        >>> normalized_legacy[0].min(), normalized_legacy[0].max()  # x range
        (-0.5, 0.5)
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"
    assert V == 17, f"Expected 17 COCO keypoints, got {V}"

    if method == 'hip_center':
        return _normalize_by_hip_center(data)
    elif method == 'image_center':
        return _normalize_by_image_center(data, img_width, img_height)
    else:
        raise ValueError(
            f"Unknown normalization method: '{method}'. "
            f"Use 'hip_center' (ST-GCN standard) or 'image_center' (legacy)."
        )


def denormalize_skeleton(
    data: np.ndarray,
    method: str = 'hip_center',
    hip_center: Optional[np.ndarray] = None,
    skeleton_size: Optional[float] = None,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Denormalize skeleton coordinates back to original space.

    NOTE: For hip_center method, denormalization requires storing the original
    hip_center and skeleton_size values during normalization. This function is
    primarily for visualization purposes.

    For most ST-GCN training workflows, you don't need denormalization since:
    - Training works directly on normalized coordinates
    - Model predictions are classification labels (not coordinates)

    Methods:
    --------
    1. 'hip_center': Requires hip_center and skeleton_size parameters
    2. 'image_center': Only requires img_width and img_height

    Args:
        data: Normalized skeleton data (C, T, V, M)
        method: Denormalization method - 'hip_center' or 'image_center'
        hip_center: Original hip center position (2, T, 1, M) - required for hip_center method
        skeleton_size: Original skeleton size (scalar) - required for hip_center method
        img_width: Image width for image_center method (default: 3840)
        img_height: Image height for image_center method (default: 2160)

    Returns:
        denormalized_data: Skeleton data in original coordinate space

    Example:
        >>> # Hip center denormalization (requires original values)
        >>> data_original = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = normalize_skeleton(data_original, method='hip_center')
        >>> # Note: In practice, you need to store hip_center and skeleton_size
        >>> # during normalization for accurate denormalization

        >>> # Image center denormalization (simpler)
        >>> normalized = normalize_skeleton(data_original, method='image_center')
        >>> denormalized = denormalize_skeleton(normalized, method='image_center')
        >>> np.allclose(data_original[:2], denormalized[:2], atol=1.0)  # Within 1 pixel
        True
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"

    if method == 'hip_center':
        if hip_center is None or skeleton_size is None:
            raise ValueError(
                "hip_center denormalization requires 'hip_center' and 'skeleton_size' parameters. "
                "These values must be saved during normalization. "
                "For visualization without original values, consider using method='image_center'."
            )
        return _denormalize_by_hip_center(data, hip_center, skeleton_size)

    elif method == 'image_center':
        return _denormalize_by_image_center(data, img_width, img_height)

    else:
        raise ValueError(
            f"Unknown denormalization method: '{method}'. "
            f"Use 'hip_center' or 'image_center'."
        )


def _denormalize_by_hip_center(
    data: np.ndarray,
    hip_center: np.ndarray,
    skeleton_size: float
) -> np.ndarray:
    """
    Reverse hip center normalization.

    Args:
        data: Normalized skeleton data (C, T, V, M)
        hip_center: Original hip center (2, T, 1, M) or (2,) for constant
        skeleton_size: Original skeleton size (scalar)

    Returns:
        denormalized_data: (C, T, V, M) in original pixel coordinates
    """
    C, T, V, M = data.shape
    denormalized_data = data.copy()

    # Reverse scale and centering: x_original = x_normalized * skeleton_size + hip_center
    denormalized_data[:2] = denormalized_data[:2] * skeleton_size + hip_center

    # Confidence channel unchanged

    return denormalized_data


def _denormalize_by_image_center(
    data: np.ndarray,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Reverse image center normalization.

    Args:
        data: Normalized skeleton data (C, T, V, M) with x,y in [-0.5, 0.5]
        img_width: Image width in pixels (default: 3840)
        img_height: Image height in pixels (default: 2160)

    Returns:
        denormalized_data: (C, T, V, M) with x,y in pixel coordinates
    """
    C, T, V, M = data.shape
    denormalized_data = data.copy()

    # Denormalize x-coordinate: [-0.5, 0.5] -> [0, img_width]
    denormalized_data[0] = (denormalized_data[0] + 0.5) * img_width

    # Denormalize y-coordinate: [-0.5, 0.5] -> [0, img_height]
    denormalized_data[1] = (denormalized_data[1] + 0.5) * img_height

    # Confidence channel remains unchanged

    return denormalized_data


def test_augmentation():
    """
    Test augmentation functions and demonstrate their effects.

    This function creates synthetic skeleton data and applies each augmentation
    to verify correctness and visualize the transformations.
    """
    print("Skeleton Data Augmentation Test")
    print("=" * 80)

    # Create synthetic skeleton data (C, T, V, M)
    C, T, V, M = 3, 60, 17, 1
    np.random.seed(42)

    # Generate synthetic data in pixel coordinates
    data = np.random.rand(C, T, V, M)
    data[0] *= 1920  # x in [0, 1920]
    data[1] *= 1080  # y in [0, 1080]
    data[2] = np.random.uniform(0.5, 1.0, (T, V, M))  # confidence in [0.5, 1.0]

    print(f"\nOriginal data shape: {data.shape}")
    print(f"Original x range: [{data[0].min():.2f}, {data[0].max():.2f}] pixels")
    print(f"Original y range: [{data[1].min():.2f}, {data[1].max():.2f}] pixels")
    print(f"Original confidence range: [{data[2].min():.3f}, {data[2].max():.3f}]")

    # Test 1: Normalization
    print("\n" + "-" * 80)
    print("Test 1: Normalization")
    print("-" * 80)
    normalized = normalize_skeleton(data, img_width=1920, img_height=1080)
    print(f"Normalized x range: [{normalized[0].min():.3f}, {normalized[0].max():.3f}]")
    print(f"Normalized y range: [{normalized[1].min():.3f}, {normalized[1].max():.3f}]")
    print(f"Normalized confidence range: [{normalized[2].min():.3f}, {normalized[2].max():.3f}]")

    # Verify denormalization
    denormalized = denormalize_skeleton(normalized, img_width=1920, img_height=1080)
    reconstruction_error = np.abs(data - denormalized).max()
    print(f"Denormalization reconstruction error: {reconstruction_error:.6f} pixels")

    # Test 2: Horizontal Flip
    print("\n" + "-" * 80)
    print("Test 2: Horizontal Flip")
    print("-" * 80)
    np.random.seed(42)
    flipped = augment_skeleton(normalized, prob=1.0)  # Force all augmentations
    print(f"Original x (frame 0, keypoint 0): {normalized[0, 0, 0, 0]:.3f}")
    print(f"After augmentation x: {flipped[0, 0, 0, 0]:.3f}")
    print(f"X-coordinate sign flipped: {np.sign(normalized[0].mean()) != np.sign(flipped[0].mean())}")

    # Test 3: Check left/right keypoint swapping
    print("\n" + "-" * 80)
    print("Test 3: Keypoint Pair Swapping (Horizontal Flip)")
    print("-" * 80)
    # Create data with distinctive values for left/right pairs
    test_data = np.zeros((3, 60, 17, 1))
    test_data[0, :, 5, 0] = 100   # left_shoulder x = 100
    test_data[0, :, 6, 0] = -100  # right_shoulder x = -100
    flipped_test = _horizontal_flip(test_data)
    print(f"Original left_shoulder (idx 5) x: {test_data[0, 0, 5, 0]:.1f}")
    print(f"Original right_shoulder (idx 6) x: {test_data[0, 0, 6, 0]:.1f}")
    print(f"Flipped left_shoulder (idx 5) x: {flipped_test[0, 0, 5, 0]:.1f}")
    print(f"Flipped right_shoulder (idx 6) x: {flipped_test[0, 0, 6, 0]:.1f}")
    print(f"Swap successful: {flipped_test[0, 0, 5, 0] == 100 and flipped_test[0, 0, 6, 0] == -100}")

    # Test 4: Gaussian Noise
    print("\n" + "-" * 80)
    print("Test 4: Gaussian Noise")
    print("-" * 80)
    np.random.seed(42)
    noisy = _add_gaussian_noise(normalized, std=0.01)
    noise_magnitude = np.abs(noisy[:2] - normalized[:2]).max()
    confidence_unchanged = np.allclose(noisy[2], normalized[2])
    print(f"Max noise magnitude (x,y): {noise_magnitude:.4f}")
    print(f"Confidence channel unchanged: {confidence_unchanged}")

    # Test 5: Temporal Crop and Resize
    print("\n" + "-" * 80)
    print("Test 5: Temporal Crop and Resize")
    print("-" * 80)
    np.random.seed(42)
    cropped = _temporal_crop_resize(normalized, crop_ratio_range=(0.8, 1.0))
    print(f"Original temporal length: {normalized.shape[1]}")
    print(f"Cropped temporal length: {cropped.shape[1]}")
    print(f"Shape preserved: {cropped.shape == normalized.shape}")

    # Test 6: Full Augmentation Pipeline
    print("\n" + "-" * 80)
    print("Test 6: Full Augmentation Pipeline")
    print("-" * 80)
    np.random.seed(42)
    augmented = augment_skeleton(normalized, prob=0.5)
    print(f"Augmented shape: {augmented.shape}")
    print(f"Augmented x range: [{augmented[0].min():.3f}, {augmented[0].max():.3f}]")
    print(f"Augmented y range: [{augmented[1].min():.3f}, {augmented[1].max():.3f}]")
    print(f"Augmented confidence range: [{augmented[2].min():.3f}, {augmented[2].max():.3f}]")

    # Test 7: Augmentation Statistics (Run 100 times)
    print("\n" + "-" * 80)
    print("Test 7: Augmentation Statistics (100 runs with prob=0.5)")
    print("-" * 80)
    np.random.seed(42)
    augmentation_counts = {"flip": 0, "noise": 0, "crop": 0}
    num_runs = 100

    for _ in range(num_runs):
        original_copy = normalized.copy()
        augmented = augment_skeleton(original_copy, prob=0.5)

        # Detect which augmentations were applied (heuristics)
        x_sign_changed = np.sign(augmented[0].mean()) != np.sign(normalized[0].mean())
        noise_added = not np.allclose(augmented[:2], normalized[:2], atol=1e-4)
        # Crop detection is harder, skip for now

        if x_sign_changed:
            augmentation_counts["flip"] += 1
        if noise_added and not x_sign_changed:
            augmentation_counts["noise"] += 1

    print(f"Horizontal flip applied: {augmentation_counts['flip']}/{num_runs} times")
    print(f"Gaussian noise applied: {augmentation_counts['noise']}/{num_runs} times")
    print(f"Expected frequency (prob=0.5): ~50 times per augmentation")

    print("\n" + "=" * 80)
    print("All tests completed successfully")
    print("=" * 80)


if __name__ == "__main__":
    test_augmentation()