File size: 5,623 Bytes
14ebc37 79deb35 a99d226 14ebc37 25b8ff1 14ebc37 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 c739381 79deb35 c739381 25b8ff1 79deb35 14ebc37 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 79deb35 c739381 a99d226 c739381 a99d226 c739381 14ebc37 25b8ff1 c739381 25b8ff1 c739381 14ebc37 79deb35 14ebc37 79deb35 a99d226 79deb35 c739381 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 c739381 79deb35 14ebc37 a99d226 79deb35 14ebc37 c739381 25b8ff1 c739381 25b8ff1 c739381 25b8ff1 c739381 79deb35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import json
import os
from transformers import PreTrainedModel, PretrainedConfig, XLMRobertaModel, XLMRobertaConfig
class XLMSteelConfig(PretrainedConfig):
"""XLM-RoBERTa 철강 분류기 설정"""
model_type = "xlm_steel_classifier"
def __init__(self, num_labels=66, **kwargs):
super().__init__(**kwargs)
self.num_labels = num_labels
class XLMIntegratedModel(PreTrainedModel):
"""XLM-RoBERTa + TF-IDF 통합 모델"""
config_class = XLMSteelConfig
def __init__(self, config):
super().__init__(config)
# XLM-RoBERTa 모델
self.xlm_roberta = XLMRobertaModel.from_pretrained('xlm-roberta-base')
# TF-IDF 벡터라이저 정보 저장
self.feature_names = getattr(config, 'feature_names', [])
self.input_size = getattr(config, 'input_size', 3000)
# 신경망 레이어 (기존 TF-IDF 모델 구조)
self.fc1 = nn.Linear(self.input_size, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, config.num_labels)
self.dropout = nn.Dropout(0.3)
# 라벨 매핑 저장
self.id2label = config.id2label
self.num_classes = config.num_labels
# 벡터라이저의 특성 정보를 텐서로 저장
self.register_buffer('feature_names_list', torch.tensor([hash(f) for f in self.feature_names], dtype=torch.long))
def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
"""통합 forward"""
# XLM-RoBERTa 출력
if input_ids is not None:
xlm_outputs = self.xlm_roberta(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=True
)
xlm_features = xlm_outputs.pooler_output
else:
xlm_features = torch.zeros(1, self.xlm_roberta.config.hidden_size)
# TF-IDF 벡터화 (내부적으로 수행)
if input_ids is not None:
# input_ids를 텍스트로 변환하여 TF-IDF 벡터화
text_vector = self._vectorize_from_ids(input_ids[0])
tfidf_features = torch.FloatTensor(text_vector).unsqueeze(0)
else:
tfidf_features = torch.zeros(1, self.input_size)
# 신경망 통과 (TF-IDF 부분만 사용)
x = F.relu(self.fc1(tfidf_features))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.dropout(x)
logits = self.fc3(x)
# 손실 계산
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))
return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}
def _vectorize_from_ids(self, input_ids):
"""input_ids를 TF-IDF 벡터로 변환"""
vector = np.zeros(self.input_size)
# input_ids를 기반으로 벡터 생성
for token_id in input_ids:
if token_id < self.input_size:
vector[token_id] += 1
if np.sum(vector) > 0:
vector = vector / np.sum(vector)
return vector
# 전역 변수
model = None
def load_model():
"""모델 로드"""
global model
# 설정 파일 로드
config_path = os.path.join(os.getcwd(), "config.json")
with open(config_path, 'r', encoding='utf-8') as f:
config_data = json.load(f)
# XLMSteelConfig 생성
config = XLMSteelConfig(
num_labels=config_data['num_labels'],
id2label=config_data['id2label'],
label2id=config_data['label2id'],
feature_names=config_data.get('feature_names', []),
input_size=config_data.get('input_size', 3000)
)
# 모델 생성 및 로드
model = XLMIntegratedModel(config)
model_path = os.path.join(os.getcwd(), "xlm_integrated_model.bin")
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()
return model
def predict(inputs):
"""예측 함수"""
global model
if model is None:
model = load_model()
# 입력 처리
if isinstance(inputs, str):
text = inputs
elif isinstance(inputs, list):
text = inputs[0] if len(inputs) > 0 else ""
elif isinstance(inputs, dict) and "inputs" in inputs:
text = inputs["inputs"]
else:
text = str(inputs)
# 텍스트를 토큰 ID로 변환 (간단한 구현)
tokens = text.lower().split()
input_ids = torch.tensor([[hash(token) % 50000 for token in tokens]]) # XLM-RoBERTa vocab size
attention_mask = torch.ones_like(input_ids)
# 예측
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs["logits"]
probabilities = F.softmax(logits, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()
label = model.id2label[str(predicted_class)]
confidence = probabilities[0][predicted_class].item()
return {
"label": label,
"confidence": confidence,
"text": text
}
# 모델 초기 로드
if __name__ == "__main__":
load_model()
|