File size: 1,602 Bytes
c977434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# models/model_loader.py
import torch
import torch.nn as nn
from transformers import ElectraModel, AutoTokenizer

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

class KOTEtagger(nn.Module):
    """
    KcELECTRA + Linear Head, multi-label emotion classifier (44 labels).
    - 가중치 파일: kote_pytorch_lightning.bin (strict=False 로딩)
    """
    def __init__(self, model_name="beomi/KcELECTRA-base", revision='v2021', num_labels=44):
        super().__init__()
        self.electra = ElectraModel.from_pretrained(model_name, revision=revision)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, revision=revision)
        self.classifier = nn.Linear(self.electra.config.hidden_size, num_labels)

    def forward(self, text: str):
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=128,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        input_ids = encoding["input_ids"].to(DEVICE)
        attention_mask = encoding["attention_mask"].to(DEVICE)
        outputs = self.electra(input_ids, attention_mask=attention_mask)
        cls = outputs.last_hidden_state[:, 0, :]
        logits = self.classifier(cls)
        return torch.sigmoid(logits)

def load_kote_model(weight_path="kote_pytorch_lightning.bin"):
    model = KOTEtagger()
    model.to(DEVICE)
    state = torch.load(weight_path, map_location=DEVICE)
    model.load_state_dict(state, strict=False)
    model.eval()
    return model