Spaces:
Sleeping
Sleeping
File size: 6,079 Bytes
d2885a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | import itertools
from pathlib import Path
from typing import Any
import numpy as np
from src.entity.config_entity import MMPoseConfig, PostureModelConfig
from src.utils.helpers import calc_keypoint_angle, normalize_pose_channels
from src.utils.logger import get_logger
class PoseFeatureGenerator:
"""
Convert raw keypoints from MMPose into the structured angle-score tensor
used by your posture classifier.
This is the cleaned version of the logic from:
- keypoint_config.py
- calculations.py
- annotate_image.py
- processing.py
"""
def __init__(
self,
mmpose_config: MMPoseConfig,
posture_model_config: PostureModelConfig,
log_dir: Path | None = None,
log_level: str = "INFO",
) -> None:
self.mmpose_config = mmpose_config
self.posture_model_config = posture_model_config
self.logger = get_logger(
self.__class__.__name__, log_dir=log_dir, level=log_level
)
# These names match the first 13 keypoints used in your current project logic
self.keypoint_names = {
0: "Body-Chin",
1: "Body-Left_eye",
2: "Body-Right_eye",
3: "Body-Left_ear",
4: "Body-Right_ear",
5: "Body-Left_shoulder",
6: "Body-Right_shoulder",
7: "Body-Left_elbow",
8: "Body-Right_elbow",
9: "Body-Left_wrist",
10: "Body-Right_wrist",
11: "Body-Left_hip",
12: "Body-Right_hip",
}
self.keypoint_indexes = {name: idx for idx, name in self.keypoint_names.items()}
self.target_structure = self._get_cube_angles(
use_str=True, num=self.mmpose_config.keypoints.use_first_n_keypoints
)
def _get_cube_angles(
self, use_str: bool = True, num: int = 13
) -> list[list[list[Any]]]:
"""
Rebuild the angle cube structure from the old keypoint_config logic.
"""
key_source = (
self.keypoint_indexes.keys() if use_str else self.keypoint_indexes.values()
)
keys = list(key_source)[:num]
edge_combinations = list(itertools.combinations(keys, 2))
sorted_angles = [
[edge, corner]
for corner in keys
for edge in edge_combinations
if corner not in edge
]
row = num - 1
col = num - 2
depth = (num + 1) // 2
init_value = [("", ""), ""] if use_str else [(0, 0), 0]
cube = [
[[init_value for _ in range(depth)] for _ in range(col)] for _ in range(row)
]
ij_order = [(i, j) for i in range(row) for j in range(i, col)]
ij_order += [(i, j) for j in range(col) for i in range(j + 1, row)]
idx = 0
for k in range(depth):
for i, j in ij_order:
cube[i][j][k] = sorted_angles[idx]
idx = (idx + 1) % len(sorted_angles)
return cube
def translate_one_landmarks(self, landmarks: np.ndarray) -> np.ndarray:
"""
Translate one person's landmarks into 2-channel 3D feature tensor.
Output shape:
(channels=2, height=12, width=11, depth=7)
Channel 0:
- angle values
Channel 1:
- angle scores
"""
num = self.mmpose_config.keypoints.use_first_n_keypoints
row = num - 1
col = num - 2
depth = (num + 1) // 2
angle_channel = np.zeros((row, col, depth), dtype=np.float32)
score_channel = np.zeros((row, col, depth), dtype=np.float32)
for i in range(row):
for j in range(col):
for k in range(depth):
edge_names, middle_name = self.target_structure[i][j][k]
angle_value, angle_score = calc_keypoint_angle(
landmarks=landmarks,
name_to_index=self.keypoint_indexes,
edge_keypoint_names=tuple(edge_names),
mid_keypoint_name=middle_name,
)
angle_channel[i, j, k] = angle_value
score_channel[i, j, k] = angle_score
return np.stack([angle_channel, score_channel], axis=0)
def build_feature_tensor(
self, landmarks: np.ndarray, normalize: bool = True
) -> np.ndarray:
"""
Convert one person's landmarks into model-ready tensor.
Final output shape after transpose:
(1, C, D, H, W)
"""
feature_tensor = self.translate_one_landmarks(landmarks) # (C, H, W, D)
feature_tensor = np.expand_dims(feature_tensor, axis=0) # (N, C, H, W, D)
if (
self.posture_model_config.feature_engineering.normalize_per_channel
and normalize
):
# Convert to N, C, D, H, W before channel normalization logic
feature_tensor = np.transpose(feature_tensor, (0, 1, 4, 2, 3))
feature_tensor = normalize_pose_channels(feature_tensor)
else:
feature_tensor = np.transpose(feature_tensor, (0, 1, 4, 2, 3))
return feature_tensor.astype(np.float32)
def build_batch_feature_tensor(
self, landmarks_batch: np.ndarray, normalize: bool = True
) -> np.ndarray:
"""
Convert many persons into model-ready pose tensor batch.
Input:
- (N, K, 3)
Output:
- (N, C, D, H, W)
"""
batch_features = [
self.translate_one_landmarks(one_person) for one_person in landmarks_batch
]
batch_features = np.stack(batch_features, axis=0) # (N, C, H, W, D)
batch_features = np.transpose(
batch_features, (0, 1, 4, 2, 3)
) # (N, C, D, H, W)
if (
self.posture_model_config.feature_engineering.normalize_per_channel
and normalize
):
batch_features = normalize_pose_channels(batch_features)
return batch_features.astype(np.float32)
|