File size: 26,955 Bytes
8133f1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
#!/usr/bin/env python3
"""
Skeleton Data Augmentation for ST-GCN Fall Detection

This module provides augmentation strategies for skeleton sequence data to improve
model generalization and robustness. All augmentations preserve the spatial-temporal
structure required by ST-GCN while introducing controlled variations.

Input Format: (C, T, V, M) where
    C = 3 channels (x, y, confidence)
    T = 60 frames (temporal window)
    V = 17 keypoints (COCO skeleton)
    M = 1 person (max persons tracked)

Augmentation Strategies:
1. Horizontal Flip: Mirror skeleton across vertical axis with keypoint swapping
2. Gaussian Noise: Add random noise to x,y coordinates (preserves confidence)
3. Temporal Crop: Random crop + resize to simulate variable fall speeds

Reference: Issue #34 - ST-GCN Training Dataset Creation
"""

import numpy as np
from typing import Tuple, Optional


# COCO 17-keypoint left/right pairs for horizontal flip
# Format: (left_index, right_index)
COCO_LEFT_RIGHT_PAIRS = [
    (1, 2),   # left_eye <-> right_eye
    (3, 4),   # left_ear <-> right_ear
    (5, 6),   # left_shoulder <-> right_shoulder
    (7, 8),   # left_elbow <-> right_elbow
    (9, 10),  # left_wrist <-> right_wrist
    (11, 12), # left_hip <-> right_hip
    (13, 14), # left_knee <-> right_knee
    (15, 16), # left_ankle <-> right_ankle
]


def augment_skeleton(data: np.ndarray, prob: float = 0.5) -> np.ndarray:
    """
    Apply random augmentations to skeleton sequence data.

    This function applies three augmentation strategies with probability `prob`:
    1. Horizontal flip with keypoint swapping
    2. Gaussian noise injection to x,y coordinates
    3. Temporal crop and resize

    Mathematical Formulations:
    -------------------------
    1. Horizontal Flip:
        x' = -x
        For each (left, right) keypoint pair: swap(left, right)

    2. Gaussian Noise:
        x' = x + N(0, sigma^2)
        y' = y + N(0, sigma^2)
        where N(0, sigma^2) ~ Normal(mean=0, std=0.01)

    3. Temporal Crop & Resize:
        T_crop ~ Uniform(0.8 * T, 1.0 * T)
        start_frame ~ Uniform(0, T - T_crop)
        cropped = data[:, start:start+T_crop, :, :]
        resized = interpolate(cropped, T)

    Args:
        data: Skeleton data with shape (C, T, V, M) where
            C = 3 (x, y, confidence)
            T = 60 (number of frames)
            V = 17 (number of keypoints)
            M = 1 (number of persons)
        prob: Probability of applying each augmentation (default: 0.5)

    Returns:
        augmented_data: Augmented skeleton data with same shape (C, T, V, M)

    Example:
        >>> data = np.random.rand(3, 60, 17, 1)
        >>> augmented = augment_skeleton(data, prob=0.5)
        >>> augmented.shape
        (3, 60, 17, 1)
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"
    assert V == 17, f"Expected 17 COCO keypoints, got {V}"
    assert M == 1, f"Expected max 1 person, got {M}"

    # Create a copy to avoid modifying original data
    augmented_data = data.copy()

    # 1. Horizontal Flip (flip x-coordinate + swap left/right keypoints)
    if np.random.rand() < prob:
        augmented_data = _horizontal_flip(augmented_data)

    # 2. Random Noise Injection (add Gaussian noise to x,y only)
    if np.random.rand() < prob:
        augmented_data = _add_gaussian_noise(augmented_data)

    # 3. Temporal Crop and Resize (crop 0.8-1.0 of length, resize back)
    if np.random.rand() < prob:
        augmented_data = _temporal_crop_resize(augmented_data)

    return augmented_data


def _horizontal_flip(data: np.ndarray) -> np.ndarray:
    """
    Horizontally flip skeleton by negating x-coordinate and swapping left/right keypoints.

    Mathematical Formulation:
        x' = -x
        y' = y
        conf' = conf
        For each (left_idx, right_idx) pair: swap keypoints

    Args:
        data: Skeleton data (C, T, V, M)

    Returns:
        flipped_data: Horizontally flipped data (C, T, V, M)
    """
    flipped_data = data.copy()

    # Flip x-coordinate (channel 0)
    flipped_data[0] = -flipped_data[0]

    # Swap left/right keypoint pairs
    for left_idx, right_idx in COCO_LEFT_RIGHT_PAIRS:
        # Swap all channels (x, y, conf) for the keypoint pair
        temp = flipped_data[:, :, left_idx, :].copy()
        flipped_data[:, :, left_idx, :] = flipped_data[:, :, right_idx, :]
        flipped_data[:, :, right_idx, :] = temp

    return flipped_data


def _add_gaussian_noise(data: np.ndarray, std: float = 0.01) -> np.ndarray:
    """
    Add Gaussian noise to x,y coordinates (preserves confidence channel).

    Mathematical Formulation:
        x' = x + N(0, sigma^2)
        y' = y + N(0, sigma^2)
        conf' = conf (unchanged)
        where sigma = 0.01 (default)

    The noise magnitude is calibrated for normalized coordinates in range [-0.5, 0.5].
    With std=0.01, 99.7% of noise values fall within [-0.03, 0.03] (3-sigma rule).

    Args:
        data: Skeleton data (C, T, V, M)
        std: Standard deviation of Gaussian noise (default: 0.01)

    Returns:
        noisy_data: Data with Gaussian noise added to x,y coordinates
    """
    C, T, V, M = data.shape
    noisy_data = data.copy()

    # Generate Gaussian noise for x,y channels only (not confidence)
    noise_shape = (2, T, V, M)  # Only x,y channels
    noise = np.random.normal(0, std, noise_shape).astype(data.dtype)

    # Add noise to x,y channels (0, 1), leave confidence channel (2) unchanged
    noisy_data[:2] += noise

    return noisy_data


def _temporal_crop_resize(data: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.8, 1.0)) -> np.ndarray:
    """
    Randomly crop temporal sequence and resize back to original length.

    This augmentation simulates variable fall speeds by compressing or expanding
    the temporal dimension. A crop ratio of 0.8 means the fall happens 20% faster,
    while 1.0 means no temporal change.

    Mathematical Formulation:
        T_crop ~ Uniform(crop_min * T, crop_max * T)
        start ~ Uniform(0, T - T_crop)
        cropped = data[:, start:start+T_crop, :, :]
        resized = interpolate(cropped, T) using linear interpolation

    Args:
        data: Skeleton data (C, T, V, M)
        crop_ratio_range: (min_ratio, max_ratio) for crop length (default: (0.8, 1.0))

    Returns:
        resized_data: Temporally augmented data with original shape (C, T, V, M)
    """
    C, T, V, M = data.shape
    min_ratio, max_ratio = crop_ratio_range

    # Sample random crop ratio
    crop_ratio = np.random.uniform(min_ratio, max_ratio)
    crop_length = int(T * crop_ratio)
    crop_length = max(1, crop_length)  # Ensure at least 1 frame

    # Sample random start position
    max_start = max(0, T - crop_length)
    start_frame = np.random.randint(0, max_start + 1) if max_start > 0 else 0

    # Extract cropped window
    cropped = data[:, start_frame:start_frame + crop_length, :, :]

    # Resize back to original temporal length using linear interpolation
    resized_data = _temporal_interpolate(cropped, T)

    return resized_data


def _temporal_interpolate(data: np.ndarray, target_length: int) -> np.ndarray:
    """
    Interpolate temporal dimension to target length using linear interpolation.

    This function performs 1D linear interpolation along the temporal axis (axis=1)
    for each channel, keypoint, and person independently.

    Args:
        data: Skeleton data (C, T, V, M)
        target_length: Target number of frames

    Returns:
        interpolated_data: Data with temporal dimension resized to target_length
    """
    C, T_src, V, M = data.shape

    if T_src == target_length:
        return data

    # Create target time indices
    src_indices = np.linspace(0, T_src - 1, T_src)
    target_indices = np.linspace(0, T_src - 1, target_length)

    # Interpolate each channel, keypoint, person combination
    interpolated_data = np.zeros((C, target_length, V, M), dtype=data.dtype)

    for c in range(C):
        for v in range(V):
            for m in range(M):
                interpolated_data[c, :, v, m] = np.interp(
                    target_indices,
                    src_indices,
                    data[c, :, v, m]
                )

    return interpolated_data


def _normalize_by_hip_center(data: np.ndarray) -> np.ndarray:
    """
    Normalize skeleton by hip center position and skeleton size (ST-GCN standard).

    This is the recommended normalization method for skeleton-based action recognition,
    following the ST-GCN paper and NTU RGB+D dataset preprocessing.

    Algorithm:
    ----------
    1. Calculate hip center from left_hip (11) and right_hip (12)
    2. If hips have low confidence (<0.3), fallback to shoulder center
    3. Center all keypoints by subtracting hip center
    4. Calculate skeleton size as average shoulder-to-hip distance
    5. Scale all coordinates by skeleton size

    COCO Keypoints Used:
    - 5: left_shoulder
    - 6: right_shoulder
    - 11: left_hip
    - 12: right_hip

    Args:
        data: Skeleton data (C, T, V, M) with C=3 (x, y, conf)

    Returns:
        normalized_data: (C, T, V, M) centered at hip, scaled by skeleton size
            - x,y channels: relative to hip center, scaled by skeleton size
            - conf channel: unchanged

    Example:
        >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = _normalize_by_hip_center(data)
        >>> # Hip center is now at (0, 0)
        >>> hip_center_x = (normalized[0, :, 11, :] + normalized[0, :, 12, :]) / 2
        >>> np.allclose(hip_center_x, 0.0, atol=1e-6)
        True
    """
    C, T, V, M = data.shape
    normalized_data = data.copy()

    # Extract hip keypoints (COCO: 11=left_hip, 12=right_hip)
    left_hip_xy = data[:2, :, 11:12, :]    # (2, T, 1, M)
    right_hip_xy = data[:2, :, 12:13, :]   # (2, T, 1, M)
    left_hip_conf = data[2:3, :, 11:12, :] # (1, T, 1, M)
    right_hip_conf = data[2:3, :, 12:13, :]# (1, T, 1, M)

    # Calculate average hip confidence across all frames
    left_hip_conf_mean = np.mean(left_hip_conf)
    right_hip_conf_mean = np.mean(right_hip_conf)

    # Determine center point (hip or shoulder fallback)
    if left_hip_conf_mean >= 0.3 and right_hip_conf_mean >= 0.3:
        # Normal case: Use hip center
        center_point = (left_hip_xy + right_hip_xy) / 2.0  # (2, T, 1, M)

        # Calculate skeleton size from shoulder-to-hip distance
        left_shoulder_xy = data[:2, :, 5:6, :]  # (2, T, 1, M)
        right_shoulder_xy = data[:2, :, 6:7, :] # (2, T, 1, M)

        # Left torso distance: ||left_shoulder - left_hip||
        left_torso = left_shoulder_xy - left_hip_xy  # (2, T, 1, M)
        left_torso_dist = np.sqrt(np.sum(left_torso ** 2, axis=0))  # (T, 1, M)

        # Right torso distance: ||right_shoulder - right_hip||
        right_torso = right_shoulder_xy - right_hip_xy  # (2, T, 1, M)
        right_torso_dist = np.sqrt(np.sum(right_torso ** 2, axis=0))  # (T, 1, M)

        # Average skeleton size across frames and left/right
        skeleton_size = np.mean([left_torso_dist, right_torso_dist])  # scalar

    else:
        # Fallback: Use shoulder center if hips not detected
        left_shoulder_xy = data[:2, :, 5:6, :]
        right_shoulder_xy = data[:2, :, 6:7, :]
        center_point = (left_shoulder_xy + right_shoulder_xy) / 2.0  # (2, T, 1, M)

        # Use shoulder width as skeleton size estimate
        shoulder_vector = right_shoulder_xy - left_shoulder_xy  # (2, T, 1, M)
        shoulder_width = np.sqrt(np.sum(shoulder_vector ** 2, axis=0))  # (T, 1, M)
        skeleton_size = np.mean(shoulder_width) * 2.0  # Approximate torso height

    # Prevent division by zero
    skeleton_size = max(skeleton_size, 1e-6)

    # Normalize x,y channels: center and scale
    normalized_data[:2] = (normalized_data[:2] - center_point) / skeleton_size

    # Confidence channel unchanged
    # normalized_data[2] remains as is

    return normalized_data


def _normalize_by_image_center(
    data: np.ndarray,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Legacy normalization by image center (for comparison only).

    This method is NOT recommended for ST-GCN training as it:
    - Includes absolute position information
    - Varies with camera angle
    - Does not normalize body size

    Use this only for comparing with old implementations or specific use cases
    where absolute position in frame matters.

    Args:
        data: Skeleton data (C, T, V, M)
        img_width: Image width in pixels (default: 3840 for AI Hub 4K)
        img_height: Image height in pixels (default: 2160 for AI Hub 4K)

    Returns:
        normalized_data: (C, T, V, M) with x,y in [-0.5, 0.5]
    """
    C, T, V, M = data.shape
    normalized_data = data.copy()

    # Normalize x-coordinate (channel 0): [0, img_width] -> [-0.5, 0.5]
    normalized_data[0] = (normalized_data[0] / img_width) - 0.5

    # Normalize y-coordinate (channel 1): [0, img_height] -> [-0.5, 0.5]
    normalized_data[1] = (normalized_data[1] / img_height) - 0.5

    # Confidence channel (2) remains unchanged in [0, 1]

    return normalized_data


def normalize_skeleton(
    data: np.ndarray,
    method: str = 'hip_center',
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Normalize skeleton coordinates using ST-GCN standard method.

    This normalization removes absolute position information and makes the model
    focus on relative pose patterns, which is critical for fall detection across
    different camera angles (AI Hub 8-camera setup).

    Methods:
    --------
    1. 'hip_center' (default, ST-GCN standard):
       - Center: Hip center (average of left_hip and right_hip)
       - Scale: Skeleton size (shoulder-to-hip distance)
       - Fallback: Shoulder center if hips not detected
       - Reference: ST-GCN (Yan et al., AAAI 2018), NTU RGB+D normalization

    2. 'image_center' (legacy, not recommended):
       - Center: Image center
       - Scale: Image dimensions
       - Use only for comparison with old implementations

    Mathematical Formulations (hip_center):
    ----------------------------------------
    Step 1: Calculate hip center
        hip_center = (left_hip + right_hip) / 2  # COCO keypoints 11, 12

    Step 2: Center all keypoints
        x' = x - hip_center_x
        y' = y - hip_center_y

    Step 3: Scale by skeleton size (shoulder-to-hip distance)
        skeleton_size = mean(||shoulder - hip||) over left and right
        x'' = x' / skeleton_size
        y'' = y' / skeleton_size

    Advantages of hip_center normalization:
    - Camera angle invariant (critical for 8-camera AI Hub dataset)
    - Absolute position independent (person can be anywhere in frame)
    - Body size normalized (tall/short people comparable)
    - Matches ST-GCN paper and most skeleton action recognition works

    Args:
        data: Skeleton data with shape (C, T, V, M) where
            C = 3 (x in pixels, y in pixels, confidence)
            T = number of frames
            V = 17 (COCO keypoints)
            M = 1 (max persons)
        method: Normalization method - 'hip_center' (default) or 'image_center'
        img_width: Image width for image_center method (default: 3840 for AI Hub 4K)
        img_height: Image height for image_center method (default: 2160 for AI Hub 4K)

    Returns:
        normalized_data: Normalized skeleton data with shape (C, T, V, M)
            For hip_center: relative coordinates centered at hip, scaled by skeleton size
            For image_center: x,y in [-0.5, 0.5], conf in [0, 1]

    Example:
        >>> # ST-GCN standard normalization
        >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = normalize_skeleton(data, method='hip_center')
        >>> # Hip is now at origin (0, 0)
        >>> # Coordinates scaled by skeleton size

        >>> # Legacy image center normalization
        >>> normalized_legacy = normalize_skeleton(data, method='image_center')
        >>> normalized_legacy[0].min(), normalized_legacy[0].max()  # x range
        (-0.5, 0.5)
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"
    assert V == 17, f"Expected 17 COCO keypoints, got {V}"

    if method == 'hip_center':
        return _normalize_by_hip_center(data)
    elif method == 'image_center':
        return _normalize_by_image_center(data, img_width, img_height)
    else:
        raise ValueError(
            f"Unknown normalization method: '{method}'. "
            f"Use 'hip_center' (ST-GCN standard) or 'image_center' (legacy)."
        )


def denormalize_skeleton(
    data: np.ndarray,
    method: str = 'hip_center',
    hip_center: Optional[np.ndarray] = None,
    skeleton_size: Optional[float] = None,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Denormalize skeleton coordinates back to original space.

    NOTE: For hip_center method, denormalization requires storing the original
    hip_center and skeleton_size values during normalization. This function is
    primarily for visualization purposes.

    For most ST-GCN training workflows, you don't need denormalization since:
    - Training works directly on normalized coordinates
    - Model predictions are classification labels (not coordinates)

    Methods:
    --------
    1. 'hip_center': Requires hip_center and skeleton_size parameters
    2. 'image_center': Only requires img_width and img_height

    Args:
        data: Normalized skeleton data (C, T, V, M)
        method: Denormalization method - 'hip_center' or 'image_center'
        hip_center: Original hip center position (2, T, 1, M) - required for hip_center method
        skeleton_size: Original skeleton size (scalar) - required for hip_center method
        img_width: Image width for image_center method (default: 3840)
        img_height: Image height for image_center method (default: 2160)

    Returns:
        denormalized_data: Skeleton data in original coordinate space

    Example:
        >>> # Hip center denormalization (requires original values)
        >>> data_original = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1]
        >>> normalized = normalize_skeleton(data_original, method='hip_center')
        >>> # Note: In practice, you need to store hip_center and skeleton_size
        >>> # during normalization for accurate denormalization

        >>> # Image center denormalization (simpler)
        >>> normalized = normalize_skeleton(data_original, method='image_center')
        >>> denormalized = denormalize_skeleton(normalized, method='image_center')
        >>> np.allclose(data_original[:2], denormalized[:2], atol=1.0)  # Within 1 pixel
        True
    """
    C, T, V, M = data.shape
    assert C == 3, f"Expected 3 channels (x, y, conf), got {C}"

    if method == 'hip_center':
        if hip_center is None or skeleton_size is None:
            raise ValueError(
                "hip_center denormalization requires 'hip_center' and 'skeleton_size' parameters. "
                "These values must be saved during normalization. "
                "For visualization without original values, consider using method='image_center'."
            )
        return _denormalize_by_hip_center(data, hip_center, skeleton_size)

    elif method == 'image_center':
        return _denormalize_by_image_center(data, img_width, img_height)

    else:
        raise ValueError(
            f"Unknown denormalization method: '{method}'. "
            f"Use 'hip_center' or 'image_center'."
        )


def _denormalize_by_hip_center(
    data: np.ndarray,
    hip_center: np.ndarray,
    skeleton_size: float
) -> np.ndarray:
    """
    Reverse hip center normalization.

    Args:
        data: Normalized skeleton data (C, T, V, M)
        hip_center: Original hip center (2, T, 1, M) or (2,) for constant
        skeleton_size: Original skeleton size (scalar)

    Returns:
        denormalized_data: (C, T, V, M) in original pixel coordinates
    """
    C, T, V, M = data.shape
    denormalized_data = data.copy()

    # Reverse scale and centering: x_original = x_normalized * skeleton_size + hip_center
    denormalized_data[:2] = denormalized_data[:2] * skeleton_size + hip_center

    # Confidence channel unchanged

    return denormalized_data


def _denormalize_by_image_center(
    data: np.ndarray,
    img_width: int = 3840,
    img_height: int = 2160
) -> np.ndarray:
    """
    Reverse image center normalization.

    Args:
        data: Normalized skeleton data (C, T, V, M) with x,y in [-0.5, 0.5]
        img_width: Image width in pixels (default: 3840)
        img_height: Image height in pixels (default: 2160)

    Returns:
        denormalized_data: (C, T, V, M) with x,y in pixel coordinates
    """
    C, T, V, M = data.shape
    denormalized_data = data.copy()

    # Denormalize x-coordinate: [-0.5, 0.5] -> [0, img_width]
    denormalized_data[0] = (denormalized_data[0] + 0.5) * img_width

    # Denormalize y-coordinate: [-0.5, 0.5] -> [0, img_height]
    denormalized_data[1] = (denormalized_data[1] + 0.5) * img_height

    # Confidence channel remains unchanged

    return denormalized_data


def test_augmentation():
    """
    Test augmentation functions and demonstrate their effects.

    This function creates synthetic skeleton data and applies each augmentation
    to verify correctness and visualize the transformations.
    """
    print("Skeleton Data Augmentation Test")
    print("=" * 80)

    # Create synthetic skeleton data (C, T, V, M)
    C, T, V, M = 3, 60, 17, 1
    np.random.seed(42)

    # Generate synthetic data in pixel coordinates
    data = np.random.rand(C, T, V, M)
    data[0] *= 1920  # x in [0, 1920]
    data[1] *= 1080  # y in [0, 1080]
    data[2] = np.random.uniform(0.5, 1.0, (T, V, M))  # confidence in [0.5, 1.0]

    print(f"\nOriginal data shape: {data.shape}")
    print(f"Original x range: [{data[0].min():.2f}, {data[0].max():.2f}] pixels")
    print(f"Original y range: [{data[1].min():.2f}, {data[1].max():.2f}] pixels")
    print(f"Original confidence range: [{data[2].min():.3f}, {data[2].max():.3f}]")

    # Test 1: Normalization
    print("\n" + "-" * 80)
    print("Test 1: Normalization")
    print("-" * 80)
    normalized = normalize_skeleton(data, img_width=1920, img_height=1080)
    print(f"Normalized x range: [{normalized[0].min():.3f}, {normalized[0].max():.3f}]")
    print(f"Normalized y range: [{normalized[1].min():.3f}, {normalized[1].max():.3f}]")
    print(f"Normalized confidence range: [{normalized[2].min():.3f}, {normalized[2].max():.3f}]")

    # Verify denormalization
    denormalized = denormalize_skeleton(normalized, img_width=1920, img_height=1080)
    reconstruction_error = np.abs(data - denormalized).max()
    print(f"Denormalization reconstruction error: {reconstruction_error:.6f} pixels")

    # Test 2: Horizontal Flip
    print("\n" + "-" * 80)
    print("Test 2: Horizontal Flip")
    print("-" * 80)
    np.random.seed(42)
    flipped = augment_skeleton(normalized, prob=1.0)  # Force all augmentations
    print(f"Original x (frame 0, keypoint 0): {normalized[0, 0, 0, 0]:.3f}")
    print(f"After augmentation x: {flipped[0, 0, 0, 0]:.3f}")
    print(f"X-coordinate sign flipped: {np.sign(normalized[0].mean()) != np.sign(flipped[0].mean())}")

    # Test 3: Check left/right keypoint swapping
    print("\n" + "-" * 80)
    print("Test 3: Keypoint Pair Swapping (Horizontal Flip)")
    print("-" * 80)
    # Create data with distinctive values for left/right pairs
    test_data = np.zeros((3, 60, 17, 1))
    test_data[0, :, 5, 0] = 100   # left_shoulder x = 100
    test_data[0, :, 6, 0] = -100  # right_shoulder x = -100
    flipped_test = _horizontal_flip(test_data)
    print(f"Original left_shoulder (idx 5) x: {test_data[0, 0, 5, 0]:.1f}")
    print(f"Original right_shoulder (idx 6) x: {test_data[0, 0, 6, 0]:.1f}")
    print(f"Flipped left_shoulder (idx 5) x: {flipped_test[0, 0, 5, 0]:.1f}")
    print(f"Flipped right_shoulder (idx 6) x: {flipped_test[0, 0, 6, 0]:.1f}")
    print(f"Swap successful: {flipped_test[0, 0, 5, 0] == 100 and flipped_test[0, 0, 6, 0] == -100}")

    # Test 4: Gaussian Noise
    print("\n" + "-" * 80)
    print("Test 4: Gaussian Noise")
    print("-" * 80)
    np.random.seed(42)
    noisy = _add_gaussian_noise(normalized, std=0.01)
    noise_magnitude = np.abs(noisy[:2] - normalized[:2]).max()
    confidence_unchanged = np.allclose(noisy[2], normalized[2])
    print(f"Max noise magnitude (x,y): {noise_magnitude:.4f}")
    print(f"Confidence channel unchanged: {confidence_unchanged}")

    # Test 5: Temporal Crop and Resize
    print("\n" + "-" * 80)
    print("Test 5: Temporal Crop and Resize")
    print("-" * 80)
    np.random.seed(42)
    cropped = _temporal_crop_resize(normalized, crop_ratio_range=(0.8, 1.0))
    print(f"Original temporal length: {normalized.shape[1]}")
    print(f"Cropped temporal length: {cropped.shape[1]}")
    print(f"Shape preserved: {cropped.shape == normalized.shape}")

    # Test 6: Full Augmentation Pipeline
    print("\n" + "-" * 80)
    print("Test 6: Full Augmentation Pipeline")
    print("-" * 80)
    np.random.seed(42)
    augmented = augment_skeleton(normalized, prob=0.5)
    print(f"Augmented shape: {augmented.shape}")
    print(f"Augmented x range: [{augmented[0].min():.3f}, {augmented[0].max():.3f}]")
    print(f"Augmented y range: [{augmented[1].min():.3f}, {augmented[1].max():.3f}]")
    print(f"Augmented confidence range: [{augmented[2].min():.3f}, {augmented[2].max():.3f}]")

    # Test 7: Augmentation Statistics (Run 100 times)
    print("\n" + "-" * 80)
    print("Test 7: Augmentation Statistics (100 runs with prob=0.5)")
    print("-" * 80)
    np.random.seed(42)
    augmentation_counts = {"flip": 0, "noise": 0, "crop": 0}
    num_runs = 100

    for _ in range(num_runs):
        original_copy = normalized.copy()
        augmented = augment_skeleton(original_copy, prob=0.5)

        # Detect which augmentations were applied (heuristics)
        x_sign_changed = np.sign(augmented[0].mean()) != np.sign(normalized[0].mean())
        noise_added = not np.allclose(augmented[:2], normalized[:2], atol=1e-4)
        # Crop detection is harder, skip for now

        if x_sign_changed:
            augmentation_counts["flip"] += 1
        if noise_added and not x_sign_changed:
            augmentation_counts["noise"] += 1

    print(f"Horizontal flip applied: {augmentation_counts['flip']}/{num_runs} times")
    print(f"Gaussian noise applied: {augmentation_counts['noise']}/{num_runs} times")
    print(f"Expected frequency (prob=0.5): ~50 times per augmentation")

    print("\n" + "=" * 80)
    print("All tests completed successfully")
    print("=" * 80)


if __name__ == "__main__":
    test_augmentation()