Halfotter
/

flud

@@ -1,157 +1,95 @@
 import torch
-import numpy as np
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import pickle
 import json
 import os
-class SteelMaterialClassifier:
-    def __init__(self, model_path):
-        """
-        Initialize the steel material classifier
-        Args:
-            model_path: Path to the model directory
-        """
-        self.model_path = model_path
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Load model and tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
-        self.model.to(self.device)
-        self.model.eval()
-        # Load additional components
-        self._load_additional_components()
-    def _load_additional_components(self):
-        """Load classifier and label embeddings if they exist"""
-        try:
-            # Load classifier if exists
-            classifier_path = os.path.join(self.model_path, "classifier.pkl")
-            if os.path.exists(classifier_path):
-                with open(classifier_path, 'rb') as f:
-                    self.classifier = pickle.load(f)
-            else:
-                self.classifier = None
-            # Load label embeddings if exists
-            embeddings_path = os.path.join(self.model_path, "label_embeddings.pkl")
-            if os.path.exists(embeddings_path):
-                with open(embeddings_path, 'rb') as f:
-                    self.label_embeddings = pickle.load(f)
-            else:
-                self.label_embeddings = None
-        except Exception as e:
-            print(f"Warning: Could not load additional components: {e}")
-            self.classifier = None
-            self.label_embeddings = None
-    def predict(self, text, top_k=5):
-        """
-        Predict steel material classification
-        Args:
-            text: Input text to classify
-            top_k: Number of top predictions to return
-        Returns:
-            dict: Prediction results with labels and probabilities
-        """
-        # Tokenize input
-        inputs = self.tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512,
-            padding=True
-        )
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        # Get model predictions
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            logits = outputs.logits
-            probabilities = torch.nn.functional.softmax(logits, dim=-1)
-        # Get top-k predictions
-        top_probs, top_indices = torch.topk(probabilities, top_k, dim=1)
-        # Convert to results
-        results = []
-        for i in range(top_k):
-            label_id = top_indices[0][i].item()
-            probability = top_probs[0][i].item()
-            label = self.model.config.id2label[label_id]
-            results.append({
-                "label": label,
-                "label_id": label_id,
-                "probability": probability
-            })
-        return {
-            "predictions": results,
-            "input_text": text,
-            "model_info": {
-                "model_name": self.model.config._name_or_path,
-                "num_labels": self.model.config.num_labels,
-                "device": str(self.device)
-            }
-        }
-    def predict_batch(self, texts, top_k=5):
-        """
-        Predict for multiple texts
-        Args:
-            texts: List of input texts
-            top_k: Number of top predictions to return
-        Returns:
-            list: List of prediction results
-        """
-        results = []
-        for text in texts:
-            result = self.predict(text, top_k)
-            results.append(result)
-        return results
-    def get_label_info(self):
-        """
-        Get information about all available labels
-        Returns:
-            dict: Label information
-        """
-        return {
-            "num_labels": self.model.config.num_labels,
-            "id2label": self.model.config.id2label,
-            "label2id": self.model.config.label2id
-        }
-# Example usage
-if __name__ == "__main__":
-    # Initialize classifier
-    model_path = "."  # Current directory
-    classifier = SteelMaterialClassifier(model_path)
-    # Example predictions
-    test_texts = [
-        "철광석을 고로에서 환원하여 선철을 제조하는 과정",
-        "천연가스를 연료로 사용하여 고로를 가열",
-        "석회석을 첨가하여 슬래그를 형성"
-    ]
-    print("=== Steel Material Classification Results ===")
-    for text in test_texts:
-        result = classifier.predict(text)
-        print(f"\nInput: {text}")
-        print(f"Top prediction: {result['predictions'][0]['label']} ({result['predictions'][0]['probability']:.4f})")
-        # Show top 3 predictions
-        print("Top 3 predictions:")
-        for i, pred in enumerate(result['predictions'][:3]):
-            print(f"  {i+1}. {pred['label']}: {pred['probability']:.4f}")

 import torch
+import torch.nn as nn
+import torch.nn.functional as F
 import json
+import numpy as np
 import os
+class IntegratedClassifier(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.fc1 = nn.Linear(config['input_size'], config['hidden_size'])
+        self.fc2 = nn.Linear(config['hidden_size'], config['intermediate_size'])
+        self.fc3 = nn.Linear(config['intermediate_size'], config['num_labels'])
+        self.dropout = nn.Dropout(0.3)
+        self.id2label = config['id2label']
+        self.input_size = config['input_size']
+    def forward(self, text):
+        # 내부적으로 TF-IDF 벡터화 수행
+        text_vector = self._vectorize_text(text)
+        text_tensor = torch.FloatTensor(text_vector).unsqueeze(0)
+        x = F.relu(self.fc1(text_tensor))
+        x = self.dropout(x)
+        x = F.relu(self.fc2(x))
+        x = self.dropout(x)
+        x = self.fc3(x)
+        return x
+    def _vectorize_text(self, text):
+        # 간단한 TF-IDF 구현
+        words = text.lower().split()
+        vector = np.zeros(self.input_size)
+        for word in words:
+            for i in range(self.input_size):
+                if word in str(i) or str(i) in word:
+                    vector[i] += 1
+        if np.sum(vector) > 0:
+            vector = vector / np.sum(vector)
+        return vector
+    def predict(self, text):
+        self.eval()
+        with torch.no_grad():
+            outputs = self.forward(text)
+            probabilities = F.softmax(outputs, dim=1)
+            predicted_class = torch.argmax(probabilities, dim=1).item()
+            label = self.id2label[str(predicted_class)]
+            confidence = probabilities[0][predicted_class].item()
+            return label, confidence
+# 전역 변수로 모델 저장
+model = None
+def load_model():
+    """모델 로드"""
+    global model
+    # 설정 파일 로드
+    config_path = os.path.join(os.getcwd(), "config.json")
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = json.load(f)
+    # 모델 생성 및 로드
+    model = IntegratedClassifier(config)
+    model_path = os.path.join(os.getcwd(), "integrated_model.bin")
+    model.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model.eval()
+    return model
+def predict(text):
+    """예측 함수"""
+    global model
+    if model is None:
+        model = load_model()
+    label, confidence = model.predict(text)
+    return {
+        "label": label,
+        "confidence": confidence,
+        "text": text
+    }
+# 모델 초기 로드
+if __name__ == "__main__":
+    load_model()