Spaces:

YoungjaeDev
/

fall-detection-demo

Sleeping

File size: 5,125 Bytes

8133f1d

"""
YOLOv11-Pose 래퍼 클래스

실시간 pose estimation을 위한 YOLOv11-Pose 모델 래퍼입니다.
"""

import logging
from typing import Optional

import numpy as np
import torch
from ultralytics import YOLO


class PoseEstimator:
    """YOLOv11-Pose 기반 포즈 추정기"""

    def __init__(
        self,
        model_path: str = "yolo11m-pose.pt",
        conf_threshold: float = 0.5,
        imgsz: int = 640,
        device: str = "cuda:0",
        logger: Optional[logging.Logger] = None
    ):
        """
        Args:
            model_path: YOLOv11-Pose 모델 경로
            conf_threshold: 감지 신뢰도 임계값
            imgsz: 입력 이미지 크기
            device: 디바이스 (cuda:0, cpu 등)
            logger: 로거 인스턴스
        """
        self.device = torch.device(device if torch.cuda.is_available() else "cpu")
        self.conf_threshold = conf_threshold
        self.imgsz = imgsz
        self.logger = logger or logging.getLogger(__name__)

        # 모델 로드
        self.logger.info(f"[Stage 1] YOLOv11-Pose 로드 중: {model_path}")
        self.model = YOLO(model_path)
        self.model.to(self.device)
        self.logger.info(f"  - Confidence threshold: {conf_threshold}")
        self.logger.info(f"  - Image size: {imgsz}")
        self.logger.info(f"  - Device: {self.device}")

    def extract(self, frame: np.ndarray, debug: bool = False) -> Optional[np.ndarray]:
        """
        프레임에서 pose keypoints 추출

        Args:
            frame: OpenCV 이미지 (H, W, 3)
            debug: 디버그 로그 출력 여부

        Returns:
            keypoints: (17, 3) numpy array 또는 None (사람이 감지되지 않은 경우)
                       각 keypoint는 (x, y, confidence) 형태
        """
        results = self.model.predict(
            frame,
            imgsz=self.imgsz,
            conf=self.conf_threshold,
            verbose=False
        )

        if results and len(results) > 0 and results[0].keypoints is not None:
            keypoints_data = results[0].keypoints.data.cpu().numpy()

            if len(keypoints_data) > 0:
                # 가장 신뢰도 높은 사람 선택
                if results[0].boxes is not None:
                    confidences = results[0].boxes.conf.cpu().numpy()
                    best_idx = np.argmax(confidences)
                    keypoints = keypoints_data[best_idx]  # (17, 3)
                else:
                    keypoints = keypoints_data[0]

                if debug:
                    avg_conf = keypoints[:, 2].mean()
                    self.logger.debug(f"  Pose detected: avg_conf={avg_conf:.3f}")

                return keypoints

        if debug:
            self.logger.debug("  No pose detected")

        return None

    def extract_batch(
        self, frames: list[np.ndarray] | np.ndarray, debug: bool = False
    ) -> list[Optional[np.ndarray]]:
        """
        여러 프레임에서 배치로 pose keypoints 추출 (GPU 활용 극대화)

        Args:
            frames: OpenCV 이미지 리스트 [(H, W, 3), ...] 또는 numpy 배열 (N, H, W, C)
            debug: 디버그 로그 출력 여부

        Returns:
            keypoints_list: [(17, 3) numpy array or None, ...] 각 프레임별 keypoints
        """
        # 빈 입력 체크 (리스트와 numpy 배열 모두 지원)
        if isinstance(frames, np.ndarray):
            if frames.size == 0:
                return []
            # numpy 배열을 리스트로 변환
            frames = list(frames)
        elif not frames:
            return []

        # YOLO 배치 추론
        results = self.model.predict(
            frames,
            imgsz=self.imgsz,
            conf=self.conf_threshold,
            verbose=False
        )

        keypoints_list = []
        for i, result in enumerate(results):
            if result.keypoints is not None:
                keypoints_data = result.keypoints.data.cpu().numpy()

                if len(keypoints_data) > 0:
                    # 가장 신뢰도 높은 사람 선택
                    if result.boxes is not None:
                        confidences = result.boxes.conf.cpu().numpy()
                        best_idx = np.argmax(confidences)
                        keypoints = keypoints_data[best_idx]  # (17, 3)
                    else:
                        keypoints = keypoints_data[0]

                    if debug:
                        avg_conf = keypoints[:, 2].mean()
                        self.logger.debug(
                            f"  Batch[{i}] Pose detected: avg_conf={avg_conf:.3f}"
                        )

                    keypoints_list.append(keypoints)
                    continue

            if debug:
                self.logger.debug(f"  Batch[{i}] No pose detected")
            keypoints_list.append(None)

        return keypoints_list

    def get_empty_keypoints(self) -> np.ndarray:
        """빈 keypoints 배열 반환 (사람이 감지되지 않은 경우 사용)"""
        return np.zeros((17, 3), dtype=np.float32)