Upload 4 files

Browse files

Files changed (4) hide show

image_classifier_model_0.2.onnx +3 -0
image_classifier_model_0.2.pth +3 -0
image_classifier_model_0.2_inference_example.py +123 -0
image_classifier_model_0.2_model_info.json +118 -0

image_classifier_model_0.2.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12651f636f6b372c3c5d7eb737e98e6db4a59b435686ac8606882fe0b56b455e
+size 1213423807

image_classifier_model_0.2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abc9a06f89f13091371eb65cb6ad5e75cafd920fd0354dfd757a4dc9deac437a
+size 1213357767

image_classifier_model_0.2_inference_example.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#!/usr/bin/env python3
+"""
+ONNX 모델을 사용한 멀티헤드 이미지 분류 추론 예제
+"""
+import onnxruntime as ort
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+import json
+# 전처리 파이프라인
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                        std=[0.229, 0.224, 0.225])
+])
+def load_model_info(model_info_path):
+    """모델 정보 로드"""
+    with open(model_info_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def preprocess_image(image_path):
+    """이미지 전처리"""
+    image = Image.open(image_path).convert('RGB')
+    tensor = transform(image)
+    return tensor.unsqueeze(0).numpy()  # 배치 차원 추가
+def softmax(x):
+    """Softmax 함수"""
+    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
+    return exp_x / np.sum(exp_x, axis=1, keepdims=True)
+def predict_image(onnx_model_path, model_info_path, image_path):
+    """이미지 분류 예측"""
+    # 모델 정보 로드
+    model_info = load_model_info(model_info_path)
+    # ONNX 세션 생성
+    session = ort.InferenceSession(onnx_model_path)
+    # 이미지 전처리
+    image_array = preprocess_image(image_path)
+    # 추론 실행
+    inputs = {'image': image_array}
+    outputs = session.run(None, inputs)
+    # 결과 해석
+    results = {}
+    head_names = list(model_info['output_specification']['heads'].keys())
+    output_names = head_names + ['features']  # features 추가
+    for i, output_name in enumerate(output_names):
+        if output_name == 'features':
+            # 특징 벡터 처리
+            features = outputs[i][0]  # 첫 번째 배치
+            results[output_name] = {
+                'embedding': features.tolist(),
+                'dimension': len(features),
+                'description': 'DINOv2 backbone features'
+            }
+        else:
+            # 분류 헤드 처리
+            logits = outputs[i]
+            probabilities = softmax(logits)[0]  # 첫 번째 배치
+            # 클래스 이름 매핑
+            class_names = model_info['class_mappings'].get(output_name, {})
+            # 최고 확률 클래스
+            pred_idx = np.argmax(probabilities)
+            pred_class = class_names.get(str(pred_idx), f"Class_{pred_idx}")
+            pred_prob = probabilities[pred_idx]
+            # 상위 3개 클래스
+            top3_indices = np.argsort(probabilities)[-3:][::-1]
+            top3_results = []
+            for idx in top3_indices:
+                class_name = class_names.get(str(idx), f"Class_{idx}")
+                prob = probabilities[idx]
+                top3_results.append({'class': class_name, 'probability': float(prob)})
+            results[output_name] = {
+                'predicted_class': pred_class,
+                'confidence': float(pred_prob),
+                'top3': top3_results
+            }
+    return results
+# 사용 예시
+if __name__ == "__main__":
+    onnx_path = "image_classifier.onnx"
+    model_info_path = "model_info.json"
+    image_path = "test_image.jpg"
+    try:
+        results = predict_image(onnx_path, model_info_path, image_path)
+        print(f"이미지 분류 결과: {image_path}")
+        print("=" * 50)
+        for output_name, result in results.items():
+            if output_name == 'features':
+                print(f"\n{output_name.upper()}:")
+                print(f"  차원: {result['dimension']}")
+                print(f"  설명: {result['description']}")
+                print(f"  특징 벡터 (처음 10개): {result['embedding'][:10]}")
+            else:
+                print(f"\n{output_name.upper()}:")
+                print(f"  예측 클래스: {result['predicted_class']}")
+                print(f"  신뢰도: {result['confidence']:.4f}")
+                print(f"  Top 3:")
+                for i, top_result in enumerate(result['top3'], 1):
+                    print(f"    {i}. {top_result['class']}: {top_result['probability']:.4f}")
+    except Exception as e:
+        print(f"추론 실패: {e}")

image_classifier_model_0.2_model_info.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "model_architecture": {
+    "backbone": "vit_large_patch14_dinov2.lvd142m",
+    "feature_dim": 1024,
+    "total_parameters": 303252502,
+    "trainable_parameters": 24598,
+    "freeze_backbone": true
+  },
+  "input_specification": {
+    "image_size": [
+      224,
+      224
+    ],
+    "channels": 3,
+    "pixel_range": [
+      0.0,
+      1.0
+    ],
+    "normalization": {
+      "mean": [
+        0.485,
+        0.456,
+        0.406
+      ],
+      "std": [
+        0.229,
+        0.224,
+        0.225
+      ],
+      "description": "ImageNet normalization for DINOv2"
+    },
+    "input_format": "RGB",
+    "tensor_layout": "NCHW"
+  },
+  "output_specification": {
+    "heads": {
+      "scene": {
+        "num_classes": 6,
+        "output_type": "logits",
+        "activation": "softmax",
+        "classes": [
+          16000001,
+          16000002,
+          16000006,
+          16000008,
+          16000009,
+          16000011
+        ]
+      },
+      "concept": {
+        "num_classes": 3,
+        "output_type": "logits",
+        "activation": "softmax",
+        "classes": [
+          17000001,
+          17000002,
+          17000003
+        ]
+      },
+      "object": {
+        "num_classes": 13,
+        "output_type": "logits",
+        "activation": "softmax",
+        "classes": [
+          18000001,
+          18000002,
+          18000004,
+          18000005,
+          18000006,
+          18000007,
+          18000008,
+          18000009,
+          18000010,
+          18000012,
+          18000014,
+          18000016,
+          "unclassified"
+        ]
+      }
+    },
+    "features": {
+      "feature_dim": 1024,
+      "output_type": "embedding",
+      "description": "DINOv2 backbone features after processing",
+      "shape": "[batch_size, 1024]"
+    }
+  },
+  "class_mappings": {
+    "scene": {
+      "0": 16000001,
+      "1": 16000002,
+      "2": 16000006,
+      "3": 16000008,
+      "4": 16000009,
+      "5": 16000011
+    },
+    "concept": {
+      "0": 17000001,
+      "1": 17000002,
+      "2": 17000003
+    },
+    "object": {
+      "0": 18000001,
+      "1": 18000002,
+      "2": 18000004,
+      "3": 18000005,
+      "4": 18000006,
+      "5": 18000007,
+      "6": 18000008,
+      "7": 18000009,
+      "8": 18000010,
+      "9": 18000012,
+      "10": 18000014,
+      "11": 18000016,
+      "12": "unclassified"
+    }
+  }
+}