fall-detection-demo / models /pose_estimator.py
YoungjaeDev
fix: HF Spaces import ์—๋Ÿฌ ํ•ด๊ฒฐ - self-contained ๊ตฌ์กฐ๋กœ ๋ณ€๊ฒฝ
8133f1d
raw
history blame
5.13 kB
"""
YOLOv11-Pose ๋ž˜ํผ ํด๋ž˜์Šค
์‹ค์‹œ๊ฐ„ pose estimation์„ ์œ„ํ•œ YOLOv11-Pose ๋ชจ๋ธ ๋ž˜ํผ์ž…๋‹ˆ๋‹ค.
"""
import logging
from typing import Optional
import numpy as np
import torch
from ultralytics import YOLO
class PoseEstimator:
"""YOLOv11-Pose ๊ธฐ๋ฐ˜ ํฌ์ฆˆ ์ถ”์ •๊ธฐ"""
def __init__(
self,
model_path: str = "yolo11m-pose.pt",
conf_threshold: float = 0.5,
imgsz: int = 640,
device: str = "cuda:0",
logger: Optional[logging.Logger] = None
):
"""
Args:
model_path: YOLOv11-Pose ๋ชจ๋ธ ๊ฒฝ๋กœ
conf_threshold: ๊ฐ์ง€ ์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’
imgsz: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ํฌ๊ธฐ
device: ๋””๋ฐ”์ด์Šค (cuda:0, cpu ๋“ฑ)
logger: ๋กœ๊ฑฐ ์ธ์Šคํ„ด์Šค
"""
self.device = torch.device(device if torch.cuda.is_available() else "cpu")
self.conf_threshold = conf_threshold
self.imgsz = imgsz
self.logger = logger or logging.getLogger(__name__)
# ๋ชจ๋ธ ๋กœ๋“œ
self.logger.info(f"[Stage 1] YOLOv11-Pose ๋กœ๋“œ ์ค‘: {model_path}")
self.model = YOLO(model_path)
self.model.to(self.device)
self.logger.info(f" - Confidence threshold: {conf_threshold}")
self.logger.info(f" - Image size: {imgsz}")
self.logger.info(f" - Device: {self.device}")
def extract(self, frame: np.ndarray, debug: bool = False) -> Optional[np.ndarray]:
"""
ํ”„๋ ˆ์ž„์—์„œ pose keypoints ์ถ”์ถœ
Args:
frame: OpenCV ์ด๋ฏธ์ง€ (H, W, 3)
debug: ๋””๋ฒ„๊ทธ ๋กœ๊ทธ ์ถœ๋ ฅ ์—ฌ๋ถ€
Returns:
keypoints: (17, 3) numpy array ๋˜๋Š” None (์‚ฌ๋žŒ์ด ๊ฐ์ง€๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
๊ฐ keypoint๋Š” (x, y, confidence) ํ˜•ํƒœ
"""
results = self.model.predict(
frame,
imgsz=self.imgsz,
conf=self.conf_threshold,
verbose=False
)
if results and len(results) > 0 and results[0].keypoints is not None:
keypoints_data = results[0].keypoints.data.cpu().numpy()
if len(keypoints_data) > 0:
# ๊ฐ€์žฅ ์‹ ๋ขฐ๋„ ๋†’์€ ์‚ฌ๋žŒ ์„ ํƒ
if results[0].boxes is not None:
confidences = results[0].boxes.conf.cpu().numpy()
best_idx = np.argmax(confidences)
keypoints = keypoints_data[best_idx] # (17, 3)
else:
keypoints = keypoints_data[0]
if debug:
avg_conf = keypoints[:, 2].mean()
self.logger.debug(f" Pose detected: avg_conf={avg_conf:.3f}")
return keypoints
if debug:
self.logger.debug(" No pose detected")
return None
def extract_batch(
self, frames: list[np.ndarray] | np.ndarray, debug: bool = False
) -> list[Optional[np.ndarray]]:
"""
์—ฌ๋Ÿฌ ํ”„๋ ˆ์ž„์—์„œ ๋ฐฐ์น˜๋กœ pose keypoints ์ถ”์ถœ (GPU ํ™œ์šฉ ๊ทน๋Œ€ํ™”)
Args:
frames: OpenCV ์ด๋ฏธ์ง€ ๋ฆฌ์ŠคํŠธ [(H, W, 3), ...] ๋˜๋Š” numpy ๋ฐฐ์—ด (N, H, W, C)
debug: ๋””๋ฒ„๊ทธ ๋กœ๊ทธ ์ถœ๋ ฅ ์—ฌ๋ถ€
Returns:
keypoints_list: [(17, 3) numpy array or None, ...] ๊ฐ ํ”„๋ ˆ์ž„๋ณ„ keypoints
"""
# ๋นˆ ์ž…๋ ฅ ์ฒดํฌ (๋ฆฌ์ŠคํŠธ์™€ numpy ๋ฐฐ์—ด ๋ชจ๋‘ ์ง€์›)
if isinstance(frames, np.ndarray):
if frames.size == 0:
return []
# numpy ๋ฐฐ์—ด์„ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
frames = list(frames)
elif not frames:
return []
# YOLO ๋ฐฐ์น˜ ์ถ”๋ก 
results = self.model.predict(
frames,
imgsz=self.imgsz,
conf=self.conf_threshold,
verbose=False
)
keypoints_list = []
for i, result in enumerate(results):
if result.keypoints is not None:
keypoints_data = result.keypoints.data.cpu().numpy()
if len(keypoints_data) > 0:
# ๊ฐ€์žฅ ์‹ ๋ขฐ๋„ ๋†’์€ ์‚ฌ๋žŒ ์„ ํƒ
if result.boxes is not None:
confidences = result.boxes.conf.cpu().numpy()
best_idx = np.argmax(confidences)
keypoints = keypoints_data[best_idx] # (17, 3)
else:
keypoints = keypoints_data[0]
if debug:
avg_conf = keypoints[:, 2].mean()
self.logger.debug(
f" Batch[{i}] Pose detected: avg_conf={avg_conf:.3f}"
)
keypoints_list.append(keypoints)
continue
if debug:
self.logger.debug(f" Batch[{i}] No pose detected")
keypoints_list.append(None)
return keypoints_list
def get_empty_keypoints(self) -> np.ndarray:
"""๋นˆ keypoints ๋ฐฐ์—ด ๋ฐ˜ํ™˜ (์‚ฌ๋žŒ์ด ๊ฐ์ง€๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ์‚ฌ์šฉ)"""
return np.zeros((17, 3), dtype=np.float32)