justin-onda commited on
Commit
0812af4
·
verified ·
1 Parent(s): 9eee414

Upload 4 files

Browse files
image_classifier_model_0.2.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12651f636f6b372c3c5d7eb737e98e6db4a59b435686ac8606882fe0b56b455e
3
+ size 1213423807
image_classifier_model_0.2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc9a06f89f13091371eb65cb6ad5e75cafd920fd0354dfd757a4dc9deac437a
3
+ size 1213357767
image_classifier_model_0.2_inference_example.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ONNX 모델을 사용한 멀티헤드 이미지 분류 추론 예제
4
+ """
5
+
6
+ import onnxruntime as ort
7
+ import numpy as np
8
+ from PIL import Image
9
+ import torchvision.transforms as transforms
10
+ import json
11
+
12
+ # 전처리 파이프라인
13
+ transform = transforms.Compose([
14
+ transforms.Resize((224, 224)),
15
+ transforms.CenterCrop(224),
16
+ transforms.ToTensor(),
17
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
18
+ std=[0.229, 0.224, 0.225])
19
+ ])
20
+
21
+ def load_model_info(model_info_path):
22
+ """모델 정보 로드"""
23
+ with open(model_info_path, 'r', encoding='utf-8') as f:
24
+ return json.load(f)
25
+
26
+ def preprocess_image(image_path):
27
+ """이미지 전처리"""
28
+ image = Image.open(image_path).convert('RGB')
29
+ tensor = transform(image)
30
+ return tensor.unsqueeze(0).numpy() # 배치 차원 추가
31
+
32
+ def softmax(x):
33
+ """Softmax 함수"""
34
+ exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
35
+ return exp_x / np.sum(exp_x, axis=1, keepdims=True)
36
+
37
+ def predict_image(onnx_model_path, model_info_path, image_path):
38
+ """이미지 분류 예측"""
39
+
40
+ # 모델 정보 로드
41
+ model_info = load_model_info(model_info_path)
42
+
43
+ # ONNX 세션 생성
44
+ session = ort.InferenceSession(onnx_model_path)
45
+
46
+ # 이미지 전처리
47
+ image_array = preprocess_image(image_path)
48
+
49
+ # 추론 실행
50
+ inputs = {'image': image_array}
51
+ outputs = session.run(None, inputs)
52
+
53
+ # 결과 해석
54
+ results = {}
55
+ head_names = list(model_info['output_specification']['heads'].keys())
56
+ output_names = head_names + ['features'] # features 추가
57
+
58
+ for i, output_name in enumerate(output_names):
59
+ if output_name == 'features':
60
+ # 특징 벡터 처리
61
+ features = outputs[i][0] # 첫 번째 배치
62
+ results[output_name] = {
63
+ 'embedding': features.tolist(),
64
+ 'dimension': len(features),
65
+ 'description': 'DINOv2 backbone features'
66
+ }
67
+ else:
68
+ # 분류 헤드 처리
69
+ logits = outputs[i]
70
+ probabilities = softmax(logits)[0] # 첫 번째 배치
71
+
72
+ # 클래스 이름 매핑
73
+ class_names = model_info['class_mappings'].get(output_name, {})
74
+
75
+ # 최고 확률 클래스
76
+ pred_idx = np.argmax(probabilities)
77
+ pred_class = class_names.get(str(pred_idx), f"Class_{pred_idx}")
78
+ pred_prob = probabilities[pred_idx]
79
+
80
+ # 상위 3개 클래스
81
+ top3_indices = np.argsort(probabilities)[-3:][::-1]
82
+ top3_results = []
83
+ for idx in top3_indices:
84
+ class_name = class_names.get(str(idx), f"Class_{idx}")
85
+ prob = probabilities[idx]
86
+ top3_results.append({'class': class_name, 'probability': float(prob)})
87
+
88
+ results[output_name] = {
89
+ 'predicted_class': pred_class,
90
+ 'confidence': float(pred_prob),
91
+ 'top3': top3_results
92
+ }
93
+
94
+ return results
95
+
96
+ # 사용 예시
97
+ if __name__ == "__main__":
98
+ onnx_path = "image_classifier.onnx"
99
+ model_info_path = "model_info.json"
100
+ image_path = "test_image.jpg"
101
+
102
+ try:
103
+ results = predict_image(onnx_path, model_info_path, image_path)
104
+
105
+ print(f"이미지 분류 결과: {image_path}")
106
+ print("=" * 50)
107
+
108
+ for output_name, result in results.items():
109
+ if output_name == 'features':
110
+ print(f"\n{output_name.upper()}:")
111
+ print(f" 차원: {result['dimension']}")
112
+ print(f" 설명: {result['description']}")
113
+ print(f" 특징 벡터 (처음 10개): {result['embedding'][:10]}")
114
+ else:
115
+ print(f"\n{output_name.upper()}:")
116
+ print(f" 예측 클래스: {result['predicted_class']}")
117
+ print(f" 신뢰도: {result['confidence']:.4f}")
118
+ print(f" Top 3:")
119
+ for i, top_result in enumerate(result['top3'], 1):
120
+ print(f" {i}. {top_result['class']}: {top_result['probability']:.4f}")
121
+
122
+ except Exception as e:
123
+ print(f"추론 실패: {e}")
image_classifier_model_0.2_model_info.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_architecture": {
3
+ "backbone": "vit_large_patch14_dinov2.lvd142m",
4
+ "feature_dim": 1024,
5
+ "total_parameters": 303252502,
6
+ "trainable_parameters": 24598,
7
+ "freeze_backbone": true
8
+ },
9
+ "input_specification": {
10
+ "image_size": [
11
+ 224,
12
+ 224
13
+ ],
14
+ "channels": 3,
15
+ "pixel_range": [
16
+ 0.0,
17
+ 1.0
18
+ ],
19
+ "normalization": {
20
+ "mean": [
21
+ 0.485,
22
+ 0.456,
23
+ 0.406
24
+ ],
25
+ "std": [
26
+ 0.229,
27
+ 0.224,
28
+ 0.225
29
+ ],
30
+ "description": "ImageNet normalization for DINOv2"
31
+ },
32
+ "input_format": "RGB",
33
+ "tensor_layout": "NCHW"
34
+ },
35
+ "output_specification": {
36
+ "heads": {
37
+ "scene": {
38
+ "num_classes": 6,
39
+ "output_type": "logits",
40
+ "activation": "softmax",
41
+ "classes": [
42
+ 16000001,
43
+ 16000002,
44
+ 16000006,
45
+ 16000008,
46
+ 16000009,
47
+ 16000011
48
+ ]
49
+ },
50
+ "concept": {
51
+ "num_classes": 3,
52
+ "output_type": "logits",
53
+ "activation": "softmax",
54
+ "classes": [
55
+ 17000001,
56
+ 17000002,
57
+ 17000003
58
+ ]
59
+ },
60
+ "object": {
61
+ "num_classes": 13,
62
+ "output_type": "logits",
63
+ "activation": "softmax",
64
+ "classes": [
65
+ 18000001,
66
+ 18000002,
67
+ 18000004,
68
+ 18000005,
69
+ 18000006,
70
+ 18000007,
71
+ 18000008,
72
+ 18000009,
73
+ 18000010,
74
+ 18000012,
75
+ 18000014,
76
+ 18000016,
77
+ "unclassified"
78
+ ]
79
+ }
80
+ },
81
+ "features": {
82
+ "feature_dim": 1024,
83
+ "output_type": "embedding",
84
+ "description": "DINOv2 backbone features after processing",
85
+ "shape": "[batch_size, 1024]"
86
+ }
87
+ },
88
+ "class_mappings": {
89
+ "scene": {
90
+ "0": 16000001,
91
+ "1": 16000002,
92
+ "2": 16000006,
93
+ "3": 16000008,
94
+ "4": 16000009,
95
+ "5": 16000011
96
+ },
97
+ "concept": {
98
+ "0": 17000001,
99
+ "1": 17000002,
100
+ "2": 17000003
101
+ },
102
+ "object": {
103
+ "0": 18000001,
104
+ "1": 18000002,
105
+ "2": 18000004,
106
+ "3": 18000005,
107
+ "4": 18000006,
108
+ "5": 18000007,
109
+ "6": 18000008,
110
+ "7": 18000009,
111
+ "8": 18000010,
112
+ "9": 18000012,
113
+ "10": 18000014,
114
+ "11": 18000016,
115
+ "12": "unclassified"
116
+ }
117
+ }
118
+ }