미디어

by jjunyuongv - opened Nov 25, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+526

-0

Files changed (6) hide show

.gitattributes +1 -0
mediapipe-endpoint/README.md +137 -0
mediapipe-endpoint/handler.py +377 -0
mediapipe-endpoint/model/efficientnet_lite0.tflite +3 -0
mediapipe-endpoint/model/pose_landmarker_lite.task +3 -0
mediapipe-endpoint/requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mediapipe-endpoint/model/pose_landmarker_lite.task filter=lfs diff=lfs merge=lfs -text

mediapipe-endpoint/README.md ADDED Viewed

	@@ -0,0 +1,137 @@

+# MediaPipe HuggingFace Inference Endpoint
+MediaPipe Pose Landmarker와 Image Classifier를 HuggingFace Inference Endpoint로 제공하는 커스텀 핸들러입니다.
+## 모델 파일
+- `model/pose_landmarker_lite.task`: 포즈 랜드마크 추출 모델
+- `model/efficientnet_lite0.tflite`: 이미지 분류 모델
+## 지원하는 엔드포인트
+### 1. `/extract_landmarks` - 포즈 랜드마크 추출
+이미지에서 33개의 포즈 랜드마크를 추출합니다.
+**요청 형식:**
+```json
+{
+  "endpoint": "/extract_landmarks",
+  "image": "base64_encoded_image_string"
+}
+```
+**응답 형식:**
+```json
+{
+  "success": true,
+  "landmarks": [
+    {
+      "id": 0,
+      "x": 0.5,
+      "y": 0.3,
+      "z": 0.1,
+      "visibility": 0.9
+    },
+    ...
+  ]
+}
+```
+### 2. `/classify_image` - 이미지 분류
+이미지를 ImageNet 1000개 클래스로 분류합니다.
+**요청 형식:**
+```json
+{
+  "endpoint": "/classify_image",
+  "image": "base64_encoded_image_string"
+}
+```
+**응답 형식:**
+```json
+{
+  "success": true,
+  "categories": [
+    {
+      "category_name": "person",
+      "score": 0.95
+    },
+    ...
+  ]
+}
+```
+### 3. `/is_person` - 사람 감지
+이미지에 사람이 있는지 판단합니다.
+**요청 형식:**
+```json
+{
+  "endpoint": "/is_person",
+  "image": "base64_encoded_image_string",
+  "threshold": 0.3
+}
+```
+**응답 형식:**
+```json
+{
+  "success": true,
+  "is_person": true
+}
+```
+## 사용 예시
+### Python 예시
+```python
+import requests
+import base64
+from PIL import Image
+import io
+# 이미지 로드 및 base64 인코딩
+image = Image.open("path/to/image.jpg")
+buffer = io.BytesIO()
+image.save(buffer, format="JPEG")
+image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+# 엔드포인트 호출
+endpoint_url = "https://your-endpoint-url.hf.space"
+response = requests.post(
+    endpoint_url,
+    json={
+        "endpoint": "/extract_landmarks",
+        "image": image_base64
+    },
+    headers={
+        "Authorization": "Bearer YOUR_HF_TOKEN"
+    }
+)
+result = response.json()
+print(result)
+```
+## 배포 방법
+1. HuggingFace Hub에 모델 저장소 생성 (예: `jjunyuongv/mediapipe-endpoint`)
+2. 이 폴더의 모든 파일을 저장소에 업로드
+3. HuggingFace Inference Endpoints에서 Custom Endpoint 생성:
+   - Instance: CPU 1 vCPU
+   - Inference Engine: Custom
+   - Authentication: Private
+   - Autoscaling: Min 0 / Max 1
+   - Scale to zero: 1시간
+## 주의사항
+- 이미지는 base64로 인코딩되어 전송되어야 합니다
+- RGB 형식의 이미지를 권장합니다
+- Private Endpoint의 경우 Authorization 헤더에 HuggingFace 토큰이 필요합니다

mediapipe-endpoint/handler.py ADDED Viewed

	@@ -0,0 +1,377 @@

+"""
+HuggingFace Inference Endpoint Handler for MediaPipe Models
+MediaPipe Pose Landmarker와 Image Classifier를 제공하는 커스텀 핸들러
+"""
+import os
+import json
+import base64
+import io
+from typing import Dict, Any, Optional, List
+from pathlib import Path
+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+import numpy as np
+from PIL import Image
+# 모델 경로 설정
+MODEL_DIR = Path(__file__).parent / "model"
+POSE_MODEL_PATH = MODEL_DIR / "pose_landmarker_lite.task"
+CLASSIFIER_MODEL_PATH = MODEL_DIR / "efficientnet_lite0.tflite"
+# 전역 변수로 모델 저장
+pose_landmarker = None
+image_classifier = None
+def init_models():
+    """모델 초기화"""
+    global pose_landmarker, image_classifier
+    try:
+        # Pose Landmarker 초기화
+        if POSE_MODEL_PATH.exists():
+            base_options = python.BaseOptions(model_asset_path=str(POSE_MODEL_PATH))
+            options = vision.PoseLandmarkerOptions(
+                base_options=base_options,
+                output_segmentation_masks=False,
+                min_pose_detection_confidence=0.5,
+                min_pose_presence_confidence=0.5,
+                min_tracking_confidence=0.5
+            )
+            pose_landmarker = vision.PoseLandmarker.create_from_options(options)
+            print("✅ Pose Landmarker 초기화 완료")
+        else:
+            print(f"⚠️  Pose 모델 파일을 찾을 수 없습니다: {POSE_MODEL_PATH}")
+        # Image Classifier 초기화
+        if CLASSIFIER_MODEL_PATH.exists():
+            base_options = python.BaseOptions(model_asset_path=str(CLASSIFIER_MODEL_PATH))
+            options = vision.ImageClassifierOptions(
+                base_options=base_options,
+                max_results=10,
+                score_threshold=0.1
+            )
+            image_classifier = vision.ImageClassifier.create_from_options(options)
+            print("✅ Image Classifier 초기화 완료")
+        else:
+            print(f"⚠️  Classifier 모델 파일을 찾을 수 없습니다: {CLASSIFIER_MODEL_PATH}")
+    except Exception as e:
+        print(f"❌ 모델 초기화 오류: {e}")
+        import traceback
+        traceback.print_exc()
+def decode_image(image_data: str) -> Image.Image:
+    """
+    base64 인코딩된 이미지 데이터를 PIL Image로 변환
+    Args:
+        image_data: base64 인코딩된 이미지 문자열
+    Returns:
+        PIL Image 객체
+    """
+    try:
+        # base64 디코딩
+        image_bytes = base64.b64decode(image_data)
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        return image
+    except Exception as e:
+        raise ValueError(f"이미지 디코딩 실패: {e}")
+def extract_landmarks_handler(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    포즈 랜드마크 추출 핸들러
+    Args:
+        data: {
+            "image": base64 인코딩된 이미지 문자열
+        }
+    Returns:
+        {
+            "success": bool,
+            "landmarks": List[Dict] 또는 None,
+            "error": str (실패 시)
+        }
+    """
+    global pose_landmarker
+    if pose_landmarker is None:
+        return {
+            "success": False,
+            "error": "Pose Landmarker가 초기화되지 않았습니다."
+        }
+    try:
+        # 이미지 디코딩
+        image_data = data.get("image")
+        if not image_data:
+            return {
+                "success": False,
+                "error": "이미지 데이터가 제공되지 않았습니다."
+            }
+        image = decode_image(image_data)
+        # PIL Image를 numpy array로 변환
+        image_array = np.array(image)
+        # RGB 형식으로 변환
+        if len(image_array.shape) == 3 and image_array.shape[2] == 4:
+            image_array = image_array[:, :, :3]
+        # MediaPipe 형식으로 변환
+        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_array)
+        # 랜드마크 추출
+        detection_result = pose_landmarker.detect(mp_image)
+        # 랜드마크가 없으면 None 반환
+        if not detection_result.pose_landmarks:
+            return {
+                "success": True,
+                "landmarks": None
+            }
+        # 첫 번째 포즈의 랜드마크 사용
+        pose_landmarks = detection_result.pose_landmarks[0]
+        # 랜드마크를 딕셔너리 리스트로 변환
+        landmarks = []
+        for idx, landmark in enumerate(pose_landmarks):
+            landmarks.append({
+                "id": idx,
+                "x": float(landmark.x),
+                "y": float(landmark.y),
+                "z": float(landmark.z),
+                "visibility": float(landmark.visibility)
+            })
+        return {
+            "success": True,
+            "landmarks": landmarks
+        }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "success": False,
+            "error": str(e)
+        }
+def classify_image_handler(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    이미지 분류 핸들러
+    Args:
+        data: {
+            "image": base64 인코딩된 이미지 문자열
+        }
+    Returns:
+        {
+            "success": bool,
+            "categories": List[Dict] 또는 None,
+            "error": str (실패 시)
+        }
+    """
+    global image_classifier
+    if image_classifier is None:
+        return {
+            "success": False,
+            "error": "Image Classifier가 초기화되지 않았습니다."
+        }
+    try:
+        # 이미지 디코딩
+        image_data = data.get("image")
+        if not image_data:
+            return {
+                "success": False,
+                "error": "이미지 데이터가 제공되지 않았습니다."
+            }
+        image = decode_image(image_data)
+        # PIL Image를 numpy array로 변환
+        image_array = np.array(image)
+        # RGB 형식으로 변환
+        if len(image_array.shape) == 3 and image_array.shape[2] == 4:
+            image_array = image_array[:, :, :3]
+        # MediaPipe 형식으로 변환
+        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_array)
+        # 이미지 분류
+        classification_result = image_classifier.classify(mp_image)
+        # 결과 추출
+        if not classification_result.classifications:
+            return {
+                "success": True,
+                "categories": None
+            }
+        categories = []
+        for category in classification_result.classifications[0].categories:
+            categories.append({
+                "category_name": category.category_name,
+                "score": float(category.score)
+            })
+        return {
+            "success": True,
+            "categories": categories
+        }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "success": False,
+            "error": str(e)
+        }
+def is_person_handler(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    사람 감지 핸들러
+    Args:
+        data: {
+            "image": base64 인코딩된 이미지 문자열,
+            "threshold": float (기본값: 0.3)
+        }
+    Returns:
+        {
+            "success": bool,
+            "is_person": bool,
+            "error": str (실패 시)
+        }
+    """
+    global image_classifier
+    if image_classifier is None:
+        return {
+            "success": False,
+            "error": "Image Classifier가 초기화되지 않았습니다."
+        }
+    try:
+        # 이미지 분류 먼저 수행
+        classify_result = classify_image_handler(data)
+        if not classify_result.get("success"):
+            return classify_result
+        categories = classify_result.get("categories")
+        if not categories:
+            return {
+                "success": True,
+                "is_person": False
+            }
+        # threshold 가져오기
+        threshold = data.get("threshold", 0.3)
+        # 사람 관련 키워드
+        person_keywords = [
+            "person", "man", "woman", "girl", "boy", "child", "baby",
+            "people", "human", "bride", "groom", "bridegroom",
+            "lady", "gentleman", "adult", "teenager", "infant"
+        ]
+        # 동물 관련 키워드 (제외)
+        animal_keywords = [
+            "animal", "dog", "cat", "bear", "monkey", "ape", "gorilla",
+            "orangutan", "chimpanzee", "elephant", "lion", "tiger",
+            "bird", "fish", "horse", "cow", "pig", "sheep", "goat",
+            "rabbit", "mouse", "rat", "hamster", "squirrel", "deer",
+            "wolf", "fox", "panda", "koala", "kangaroo", "zebra",
+            "giraffe", "camel", "donkey", "mule", "llama", "alpaca"
+        ]
+        # 상위 결과 확인
+        for category in categories:
+            category_name_lower = category["category_name"].lower()
+            score = category["score"]
+            # 동물 관련 키워드가 포함되어 있으면 즉시 차단
+            if any(keyword in category_name_lower for keyword in animal_keywords):
+                return {
+                    "success": True,
+                    "is_person": False
+                }
+            # 사람 관련 키워드가 포함되어 있고 신뢰도가 임계값 이상이면 사람으로 판단
+            if any(keyword in category_name_lower for keyword in person_keywords):
+                if score >= threshold:
+                    return {
+                        "success": True,
+                        "is_person": True
+                    }
+        # 사람 관련 클래스가 없으면 차단
+        return {
+            "success": True,
+            "is_person": False
+        }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "success": False,
+            "error": str(e)
+        }
+def handler(data: Dict[str, Any], context) -> Dict[str, Any]:
+    """
+    HuggingFace Inference Endpoint 메인 핸들러
+    Args:
+        data: 요청 데이터
+        context: 컨텍스트 객체
+    Returns:
+        응답 딕셔너리
+    """
+    # 모델 초기화 (최초 1회만)
+    global pose_landmarker, image_classifier
+    if pose_landmarker is None and image_classifier is None:
+        init_models()
+    # 엔드포인트 경로 확인
+    endpoint = data.get("endpoint", "")
+    try:
+        if endpoint == "/extract_landmarks":
+            return extract_landmarks_handler(data)
+        elif endpoint == "/classify_image":
+            return classify_image_handler(data)
+        elif endpoint == "/is_person":
+            return is_person_handler(data)
+        else:
+            return {
+                "success": False,
+                "error": f"알 수 없는 엔드포인트: {endpoint}. 지원되는 엔드포인트: /extract_landmarks, /classify_image, /is_person"
+            }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "success": False,
+            "error": str(e)
+        }

mediapipe-endpoint/model/efficientnet_lite0.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c7ab0a6e5dcbf38a8c33b960996a55a3b4300b36a018c4545801de3a3c8bde0
+size 18582189

mediapipe-endpoint/model/pose_landmarker_lite.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59929e1d1ee95287735ddd833b19cf4ac46d29bc7afddbbf6753c459690d574a
+size 5777746

mediapipe-endpoint/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+# MediaPipe 모델 실행을 위한 필수 의존성
+mediapipe>=0.10.0
+pillow>=10.0.0
+numpy>=1.24.0