Halfotter commited on
Commit
79deb35
ยท
verified ยท
1 Parent(s): 5bfc192

Upload inference.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +78 -140
inference.py CHANGED
@@ -1,157 +1,95 @@
1
  import torch
2
- import numpy as np
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import pickle
5
  import json
 
6
  import os
7
 
8
- class SteelMaterialClassifier:
9
- def __init__(self, model_path):
10
- """
11
- Initialize the steel material classifier
12
-
13
- Args:
14
- model_path: Path to the model directory
15
- """
16
- self.model_path = model_path
17
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
 
19
- # Load model and tokenizer
20
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
21
- self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
22
- self.model.to(self.device)
23
- self.model.eval()
24
 
25
- # Load additional components
26
- self._load_additional_components()
 
 
 
27
 
28
- def _load_additional_components(self):
29
- """Load classifier and label embeddings if they exist"""
30
- try:
31
- # Load classifier if exists
32
- classifier_path = os.path.join(self.model_path, "classifier.pkl")
33
- if os.path.exists(classifier_path):
34
- with open(classifier_path, 'rb') as f:
35
- self.classifier = pickle.load(f)
36
- else:
37
- self.classifier = None
38
-
39
- # Load label embeddings if exists
40
- embeddings_path = os.path.join(self.model_path, "label_embeddings.pkl")
41
- if os.path.exists(embeddings_path):
42
- with open(embeddings_path, 'rb') as f:
43
- self.label_embeddings = pickle.load(f)
44
- else:
45
- self.label_embeddings = None
46
-
47
- except Exception as e:
48
- print(f"Warning: Could not load additional components: {e}")
49
- self.classifier = None
50
- self.label_embeddings = None
51
 
52
- def predict(self, text, top_k=5):
53
- """
54
- Predict steel material classification
 
55
 
56
- Args:
57
- text: Input text to classify
58
- top_k: Number of top predictions to return
59
-
60
- Returns:
61
- dict: Prediction results with labels and probabilities
62
- """
63
- # Tokenize input
64
- inputs = self.tokenizer(
65
- text,
66
- return_tensors="pt",
67
- truncation=True,
68
- max_length=512,
69
- padding=True
70
- )
71
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
72
-
73
- # Get model predictions
74
- with torch.no_grad():
75
- outputs = self.model(**inputs)
76
- logits = outputs.logits
77
- probabilities = torch.nn.functional.softmax(logits, dim=-1)
78
 
79
- # Get top-k predictions
80
- top_probs, top_indices = torch.topk(probabilities, top_k, dim=1)
81
 
82
- # Convert to results
83
- results = []
84
- for i in range(top_k):
85
- label_id = top_indices[0][i].item()
86
- probability = top_probs[0][i].item()
87
- label = self.model.config.id2label[label_id]
88
-
89
- results.append({
90
- "label": label,
91
- "label_id": label_id,
92
- "probability": probability
93
- })
94
-
95
- return {
96
- "predictions": results,
97
- "input_text": text,
98
- "model_info": {
99
- "model_name": self.model.config._name_or_path,
100
- "num_labels": self.model.config.num_labels,
101
- "device": str(self.device)
102
- }
103
- }
104
 
105
- def predict_batch(self, texts, top_k=5):
106
- """
107
- Predict for multiple texts
108
-
109
- Args:
110
- texts: List of input texts
111
- top_k: Number of top predictions to return
 
 
112
 
113
- Returns:
114
- list: List of prediction results
115
- """
116
- results = []
117
- for text in texts:
118
- result = self.predict(text, top_k)
119
- results.append(result)
120
- return results
121
 
122
- def get_label_info(self):
123
- """
124
- Get information about all available labels
125
-
126
- Returns:
127
- dict: Label information
128
- """
129
- return {
130
- "num_labels": self.model.config.num_labels,
131
- "id2label": self.model.config.id2label,
132
- "label2id": self.model.config.label2id
133
- }
134
 
135
- # Example usage
136
- if __name__ == "__main__":
137
- # Initialize classifier
138
- model_path = "." # Current directory
139
- classifier = SteelMaterialClassifier(model_path)
140
 
141
- # Example predictions
142
- test_texts = [
143
- "์ฒ ๊ด‘์„์„ ๊ณ ๋กœ์—์„œ ํ™˜์›ํ•˜์—ฌ ์„ ์ฒ ์„ ์ œ์กฐํ•˜๋Š” ๊ณผ์ •",
144
- "์ฒœ์—ฐ๊ฐ€์Šค๋ฅผ ์—ฐ๋ฃŒ๋กœ ์‚ฌ์šฉํ•˜์—ฌ ๊ณ ๋กœ๋ฅผ ๊ฐ€์—ด",
145
- "์„ํšŒ์„์„ ์ฒจ๊ฐ€ํ•˜์—ฌ ์Šฌ๋ž˜๊ทธ๋ฅผ ํ˜•์„ฑ"
146
- ]
147
 
148
- print("=== Steel Material Classification Results ===")
149
- for text in test_texts:
150
- result = classifier.predict(text)
151
- print(f"\nInput: {text}")
152
- print(f"Top prediction: {result['predictions'][0]['label']} ({result['predictions'][0]['probability']:.4f})")
153
-
154
- # Show top 3 predictions
155
- print("Top 3 predictions:")
156
- for i, pred in enumerate(result['predictions'][:3]):
157
- print(f" {i+1}. {pred['label']}: {pred['probability']:.4f}")
 
 
1
  import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
 
4
  import json
5
+ import numpy as np
6
  import os
7
 
8
+ class IntegratedClassifier(nn.Module):
9
+ def __init__(self, config):
10
+ super().__init__()
11
+ self.fc1 = nn.Linear(config['input_size'], config['hidden_size'])
12
+ self.fc2 = nn.Linear(config['hidden_size'], config['intermediate_size'])
13
+ self.fc3 = nn.Linear(config['intermediate_size'], config['num_labels'])
14
+ self.dropout = nn.Dropout(0.3)
15
+ self.id2label = config['id2label']
16
+ self.input_size = config['input_size']
 
17
 
18
+ def forward(self, text):
19
+ # ๋‚ด๋ถ€์ ์œผ๋กœ TF-IDF ๋ฒกํ„ฐํ™” ์ˆ˜ํ–‰
20
+ text_vector = self._vectorize_text(text)
21
+ text_tensor = torch.FloatTensor(text_vector).unsqueeze(0)
 
22
 
23
+ x = F.relu(self.fc1(text_tensor))
24
+ x = self.dropout(x)
25
+ x = F.relu(self.fc2(x))
26
+ x = self.dropout(x)
27
+ x = self.fc3(x)
28
 
29
+ return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def _vectorize_text(self, text):
32
+ # ๊ฐ„๋‹จํ•œ TF-IDF ๊ตฌํ˜„
33
+ words = text.lower().split()
34
+ vector = np.zeros(self.input_size)
35
 
36
+ for word in words:
37
+ for i in range(self.input_size):
38
+ if word in str(i) or str(i) in word:
39
+ vector[i] += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ if np.sum(vector) > 0:
42
+ vector = vector / np.sum(vector)
43
 
44
+ return vector
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def predict(self, text):
47
+ self.eval()
48
+ with torch.no_grad():
49
+ outputs = self.forward(text)
50
+ probabilities = F.softmax(outputs, dim=1)
51
+ predicted_class = torch.argmax(probabilities, dim=1).item()
52
+
53
+ label = self.id2label[str(predicted_class)]
54
+ confidence = probabilities[0][predicted_class].item()
55
 
56
+ return label, confidence
57
+
58
+ # ์ „์—ญ ๋ณ€์ˆ˜๋กœ ๋ชจ๋ธ ์ €์žฅ
59
+ model = None
60
+
61
+ def load_model():
62
+ """๋ชจ๋ธ ๋กœ๋“œ"""
63
+ global model
64
 
65
+ # ์„ค์ • ํŒŒ์ผ ๋กœ๋“œ
66
+ config_path = os.path.join(os.getcwd(), "config.json")
67
+ with open(config_path, 'r', encoding='utf-8') as f:
68
+ config = json.load(f)
69
+
70
+ # ๋ชจ๋ธ ์ƒ์„ฑ ๋ฐ ๋กœ๋“œ
71
+ model = IntegratedClassifier(config)
72
+ model_path = os.path.join(os.getcwd(), "integrated_model.bin")
73
+ model.load_state_dict(torch.load(model_path, map_location='cpu'))
74
+ model.eval()
75
+
76
+ return model
77
 
78
+ def predict(text):
79
+ """์˜ˆ์ธก ํ•จ์ˆ˜"""
80
+ global model
 
 
81
 
82
+ if model is None:
83
+ model = load_model()
 
 
 
 
84
 
85
+ label, confidence = model.predict(text)
86
+
87
+ return {
88
+ "label": label,
89
+ "confidence": confidence,
90
+ "text": text
91
+ }
92
+
93
+ # ๋ชจ๋ธ ์ดˆ๊ธฐ ๋กœ๋“œ
94
+ if __name__ == "__main__":
95
+ load_model()