Halfotter commited on
Commit
2f1dcf0
Β·
verified Β·
1 Parent(s): ff282f7

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +71 -28
  2. requirements.txt +6 -5
  3. test_current_model.py +83 -0
  4. test_sintered_ore.py +78 -0
README.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # Steel Industry Material Classification Model
2
 
3
  This model is trained to classify steel industry materials and products based on text descriptions. It uses a custom TF-IDF + Neural Network approach and can classify input text into 66 different steel-related categories.
@@ -25,37 +35,70 @@ The model can classify the following steel industry materials:
25
 
26
  ```python
27
  import torch
 
28
  import torch.nn.functional as F
29
- import pickle
30
  import joblib
31
- from sklearn.feature_extraction.text import TfidfVectorizer
32
-
33
- # Load model components
34
- with open('vectorizer.pkl', 'rb') as f:
35
- vectorizer = joblib.load(f)
36
-
37
- with open('model.pkl', 'rb') as f:
38
- model_data = pickle.load(f)
39
-
40
- model = model_data['model']
41
- id2label = model_data['id2label']
42
-
43
- # Prepare input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  text = "철광석을 κ³ λ‘œμ—μ„œ ν™˜μ›ν•˜μ—¬ 선철을 μ œμ‘°ν•˜λŠ” κ³Όμ •"
45
- text_vector = vectorizer.transform([text]).toarray()
46
- text_tensor = torch.FloatTensor(text_vector)
47
-
48
- # Predict
49
- model.eval()
50
- with torch.no_grad():
51
- outputs = model(text_tensor)
52
- probabilities = F.softmax(outputs, dim=1)
53
- predicted_class = torch.argmax(probabilities, dim=1).item()
54
-
55
- # Get label
56
- label = id2label[str(predicted_class)]
57
- confidence = probabilities[0][predicted_class].item()
58
-
59
  print(f"Predicted: {label}")
60
  print(f"Confidence: {confidence:.4f}")
61
  ```
 
1
+ ---
2
+ language: ko
3
+ tags:
4
+ - text-classification
5
+ - steel-industry
6
+ - tf-idf
7
+ - neural-network
8
+ license: mit
9
+ ---
10
+
11
  # Steel Industry Material Classification Model
12
 
13
  This model is trained to classify steel industry materials and products based on text descriptions. It uses a custom TF-IDF + Neural Network approach and can classify input text into 66 different steel-related categories.
 
35
 
36
  ```python
37
  import torch
38
+ import torch.nn as nn
39
  import torch.nn.functional as F
 
40
  import joblib
41
+ import json
42
+ from huggingface_hub import hf_hub_download
43
+
44
+ # SimpleClassifier 클래슀 μ •μ˜
45
+ class SimpleClassifier(nn.Module):
46
+ def __init__(self, input_size, num_classes):
47
+ super(SimpleClassifier, self).__init__()
48
+ self.fc1 = nn.Linear(input_size, 256)
49
+ self.fc2 = nn.Linear(256, 128)
50
+ self.fc3 = nn.Linear(128, num_classes)
51
+ self.dropout = nn.Dropout(0.3)
52
+
53
+ def forward(self, x):
54
+ x = F.relu(self.fc1(x))
55
+ x = self.dropout(x)
56
+ x = F.relu(self.fc2(x))
57
+ x = self.dropout(x)
58
+ x = self.fc3(x)
59
+ return x
60
+
61
+ # λͺ¨λΈ νŒŒμΌλ“€ λ‹€μš΄λ‘œλ“œ
62
+ model_path = hf_hub_download(repo_id="Halfotter/flud", filename="pytorch_model.bin")
63
+ vectorizer_path = hf_hub_download(repo_id="Halfotter/flud", filename="vectorizer.pkl")
64
+ config_path = hf_hub_download(repo_id="Halfotter/flud", filename="config.json")
65
+
66
+ # μ„€μ • λ‘œλ“œ
67
+ with open(config_path, 'r', encoding='utf-8') as f:
68
+ config = json.load(f)
69
+
70
+ id2label = config.get('id2label', {})
71
+
72
+ # λͺ¨λΈ λ‘œλ“œ
73
+ input_size = 3000 # TF-IDF νŠΉμ„± 수
74
+ num_classes = len(id2label)
75
+ model = SimpleClassifier(input_size, num_classes)
76
+ model.load_state_dict(torch.load(model_path, map_location='cpu'))
77
+
78
+ # 벑터라이저 λ‘œλ“œ
79
+ vectorizer = joblib.load(vectorizer_path)
80
+
81
+ # 예츑 ν•¨μˆ˜
82
+ def predict(text):
83
+ model.eval()
84
+
85
+ # TF-IDF 벑터화
86
+ text_vector = vectorizer.transform([text]).toarray()
87
+ text_tensor = torch.FloatTensor(text_vector)
88
+
89
+ with torch.no_grad():
90
+ outputs = model(text_tensor)
91
+ probabilities = F.softmax(outputs, dim=1)
92
+ predicted_class = torch.argmax(probabilities, dim=1).item()
93
+
94
+ label = id2label[str(predicted_class)]
95
+ confidence = probabilities[0][predicted_class].item()
96
+
97
+ return label, confidence
98
+
99
+ # μ‚¬μš© μ˜ˆμ‹œ
100
  text = "철광석을 κ³ λ‘œμ—μ„œ ν™˜μ›ν•˜μ—¬ 선철을 μ œμ‘°ν•˜λŠ” κ³Όμ •"
101
+ label, confidence = predict(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  print(f"Predicted: {label}")
103
  print(f"Confidence: {confidence:.4f}")
104
  ```
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- torch>=1.9.0
2
- scikit-learn>=1.0.0
3
- numpy>=1.21.0
4
- pandas>=1.3.0
5
- joblib>=1.1.0
 
 
1
+ torch==2.8.0
2
+ scikit-learn==1.7.1
3
+ joblib==1.5.2
4
+ numpy==2.2.6
5
+ pandas==2.3.2
6
+ huggingface-hub>=0.19.0
test_current_model.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import pickle
5
+ import joblib
6
+ import numpy as np
7
+
8
+ # SimpleClassifier 클래슀 μ •μ˜
9
+ class SimpleClassifier(nn.Module):
10
+ def __init__(self, input_size, num_classes):
11
+ super(SimpleClassifier, self).__init__()
12
+ self.fc1 = nn.Linear(input_size, 256)
13
+ self.fc2 = nn.Linear(256, 128)
14
+ self.fc3 = nn.Linear(128, num_classes)
15
+ self.dropout = nn.Dropout(0.3)
16
+
17
+ def forward(self, x):
18
+ x = F.relu(self.fc1(x))
19
+ x = self.dropout(x)
20
+ x = F.relu(self.fc2(x))
21
+ x = self.dropout(x)
22
+ x = self.fc3(x)
23
+ return x
24
+
25
+ def test_current_model():
26
+ """ν˜„μž¬ λͺ¨λΈ ν…ŒμŠ€νŠΈ"""
27
+ print("=== ν˜„μž¬ λͺ¨λΈ ν…ŒμŠ€νŠΈ ===")
28
+
29
+ try:
30
+ # μ„€μ • λ‘œλ“œ
31
+ with open('config.json', 'r', encoding='utf-8') as f:
32
+ import json
33
+ config = json.load(f)
34
+
35
+ id2label = config.get('id2label', {})
36
+ print(f"라벨 수: {len(id2label)}")
37
+
38
+ # λͺ¨λΈ λ‘œλ“œ
39
+ input_size = 3000 # TF-IDF νŠΉμ„± 수
40
+ num_classes = len(id2label)
41
+ model = SimpleClassifier(input_size, num_classes)
42
+ model.load_state_dict(torch.load('pytorch_model.bin', map_location='cpu'))
43
+
44
+ # 벑터라이저 λ‘œλ“œ
45
+ vectorizer = joblib.load('vectorizer.pkl')
46
+
47
+ model.eval()
48
+
49
+ # ν…ŒμŠ€νŠΈ 단어듀 (ν™˜μ›μ²  포함)
50
+ test_words = ["μ² γ„Ή", "CaO", "ν•΄λ©΄μ² ", "λ“±λ₯˜", "ν™˜μ›μ² "]
51
+
52
+ for word in test_words:
53
+ print(f"\n{'='*50}")
54
+ print(f"μž…λ ₯: '{word}'")
55
+ print(f"{'='*50}")
56
+
57
+ # TF-IDF 벑터화
58
+ word_vector = vectorizer.transform([word]).toarray()
59
+ word_tensor = torch.FloatTensor(word_vector)
60
+
61
+ with torch.no_grad():
62
+ outputs = model(word_tensor)
63
+ probabilities = F.softmax(outputs, dim=1)
64
+
65
+ # μƒμœ„ 5개 예츑
66
+ top_probs, top_indices = torch.topk(probabilities, 5, dim=1)
67
+
68
+ print(f"μ΅œλŒ€ ν™•λ₯ : {probabilities.max().item():.4f} ({probabilities.max().item()*100:.1f}%)")
69
+ print(f"μƒμœ„ 5개 예츑:")
70
+
71
+ for i in range(5):
72
+ label_id = top_indices[0][i].item()
73
+ probability = top_probs[0][i].item()
74
+ label = id2label.get(str(label_id), f"Unknown_{label_id}")
75
+ print(f" {i+1}. {label}: {probability:.4f} ({probability*100:.1f}%)")
76
+
77
+ except Exception as e:
78
+ print(f"μ—λŸ¬ λ°œμƒ: {e}")
79
+ import traceback
80
+ traceback.print_exc()
81
+
82
+ if __name__ == "__main__":
83
+ test_current_model()
test_sintered_ore.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import pickle
5
+ import joblib
6
+ import numpy as np
7
+
8
+ # SimpleClassifier 클래슀 μ •μ˜
9
+ class SimpleClassifier(nn.Module):
10
+ def __init__(self, input_size, num_classes):
11
+ super(SimpleClassifier, self).__init__()
12
+ self.fc1 = nn.Linear(input_size, 256)
13
+ self.fc2 = nn.Linear(256, 128)
14
+ self.fc3 = nn.Linear(128, num_classes)
15
+ self.dropout = nn.Dropout(0.3)
16
+
17
+ def forward(self, x):
18
+ x = F.relu(self.fc1(x))
19
+ x = self.dropout(x)
20
+ x = F.relu(self.fc2(x))
21
+ x = self.dropout(x)
22
+ x = self.fc3(x)
23
+ return x
24
+
25
+ def test_sintered_ore():
26
+ """μ†Œκ²°κ΄‘ ν…ŒμŠ€νŠΈ"""
27
+ print("=== μ†Œκ²°κ΄‘ ν…ŒμŠ€νŠΈ ===")
28
+
29
+ try:
30
+ # μ„€μ • λ‘œλ“œ
31
+ with open('config.json', 'r', encoding='utf-8') as f:
32
+ import json
33
+ config = json.load(f)
34
+
35
+ id2label = config.get('id2label', {})
36
+
37
+ # λͺ¨λΈ λ‘œλ“œ
38
+ input_size = 3000
39
+ num_classes = len(id2label)
40
+ model = SimpleClassifier(input_size, num_classes)
41
+ model.load_state_dict(torch.load('pytorch_model.bin', map_location='cpu'))
42
+
43
+ # 벑터라이저 λ‘œλ“œ
44
+ vectorizer = joblib.load('vectorizer.pkl')
45
+
46
+ model.eval()
47
+
48
+ # μ†Œκ²°κ΄‘ ν…ŒμŠ€νŠΈ
49
+ test_word = "μ†Œκ²°κ΄‘"
50
+ print(f"μž…λ ₯: '{test_word}'")
51
+
52
+ # TF-IDF 벑터화
53
+ word_vector = vectorizer.transform([test_word]).toarray()
54
+ word_tensor = torch.FloatTensor(word_vector)
55
+
56
+ with torch.no_grad():
57
+ outputs = model(word_tensor)
58
+ probabilities = F.softmax(outputs, dim=1)
59
+
60
+ # μƒμœ„ 10개 예츑
61
+ top_probs, top_indices = torch.topk(probabilities, 10, dim=1)
62
+
63
+ print(f"μ΅œλŒ€ ν™•λ₯ : {probabilities.max().item():.4f} ({probabilities.max().item()*100:.1f}%)")
64
+ print(f"μƒμœ„ 10개 예츑:")
65
+
66
+ for i in range(10):
67
+ label_id = top_indices[0][i].item()
68
+ probability = top_probs[0][i].item()
69
+ label = id2label.get(str(label_id), f"Unknown_{label_id}")
70
+ print(f" {i+1}. {label}: {probability:.4f} ({probability*100:.1f}%)")
71
+
72
+ except Exception as e:
73
+ print(f"μ—λŸ¬ λ°œμƒ: {e}")
74
+ import traceback
75
+ traceback.print_exc()
76
+
77
+ if __name__ == "__main__":
78
+ test_sintered_ore()