hyeji commited on
Commit
0fde84f
·
1 Parent(s): 8abc42c

second commit

Browse files
Files changed (2) hide show
  1. KoELECTRA.py +118 -0
  2. portToHub.py +11 -5
KoELECTRA.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.nn import CrossEntropyLoss, MSELoss
4
+ from transformers.activations import get_activation
5
+ from transformers import (
6
+ ElectraPreTrainedModel,
7
+ ElectraModel,
8
+ ElectraConfig,
9
+ ElectraTokenizer,
10
+ BertConfig,
11
+ BertTokenizer
12
+ )
13
+
14
+ # MODEL_CLASSES = {
15
+ # "koelectra-base": (ElectraConfig, koElectraForSequenceClassification, ElectraTokenizer),
16
+ # "koelectra-small": (ElectraConfig, koElectraForSequenceClassification, ElectraTokenizer),
17
+ # "koelectra-base-v2": (ElectraConfig, koElectraForSequenceClassification, ElectraTokenizer),
18
+ # "koelectra-small-v2": (ElectraConfig, koElectraForSequenceClassification, ElectraTokenizer),
19
+ # }
20
+
21
+
22
+ # def load_tokenizer(args):
23
+ # return MODEL_CLASSES[args.model_type][2].from_pretrained(args.model_name_or_path)
24
+
25
+
26
+ class ElectraClassificationHead(nn.Module):
27
+ """Head for sentence-level classification tasks."""
28
+
29
+ def __init__(self, config, num_labels):
30
+ super().__init__()
31
+ self.dense = nn.Linear(config.hidden_size, 4*config.hidden_size)
32
+ self.dropout = nn.Dropout(config.hidden_dropout_prob)
33
+ self.out_proj = nn.Linear(4*config.hidden_size,num_labels)
34
+
35
+ def forward(self, features, **kwargs):
36
+ x = features[:, 0, :] # take <s> token (equiv. to [CLS])
37
+ x = self.dropout(x)
38
+ x = self.dense(x)
39
+ x = get_activation("gelu")(x) # although BERT uses tanh here, it seems Electra authors used gelu here
40
+ x = self.dropout(x)
41
+ x = self.out_proj(x)
42
+ return x
43
+
44
+ class koElectraForSequenceClassification(ElectraPreTrainedModel):
45
+ def __init__(self,
46
+ config,
47
+ num_labels):
48
+ super().__init__(config)
49
+ self.num_labels = num_labels
50
+ self.electra = ElectraModel(config)
51
+ self.classifier = ElectraClassificationHead(config, num_labels)
52
+
53
+ self.init_weights()
54
+ def forward(
55
+ self,
56
+ input_ids=None,
57
+ attention_mask=None,
58
+ token_type_ids=None,
59
+ position_ids=None,
60
+ head_mask=None,
61
+ inputs_embeds=None,
62
+ labels=None,
63
+ output_attentions=None,
64
+ output_hidden_states=None,
65
+ ):
66
+ r"""
67
+ labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
68
+ Labels for computing the sequence classification/regression loss.
69
+ Indices should be in :obj:`[0, ..., config.num_labels - 1]`.
70
+ If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
71
+ If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
72
+ """
73
+ discriminator_hidden_states = self.electra(
74
+ input_ids,
75
+ attention_mask,
76
+ token_type_ids,
77
+ position_ids,
78
+ head_mask,
79
+ inputs_embeds,
80
+ output_attentions,
81
+ output_hidden_states,
82
+ )
83
+
84
+ sequence_output = discriminator_hidden_states[0]
85
+ logits = self.classifier(sequence_output)
86
+
87
+ outputs = (logits,) + discriminator_hidden_states[1:] # add hidden states and attention if they are here
88
+
89
+ if labels is not None:
90
+ if self.num_labels == 1:
91
+ # We are doing regression
92
+ loss_fct = MSELoss()
93
+ loss = loss_fct(logits.view(-1), labels.view(-1))
94
+ else:
95
+ loss_fct = CrossEntropyLoss()
96
+ loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
97
+ outputs = (loss,) + outputs
98
+
99
+ return outputs # (loss), (logits), (hidden_states), (attentions)
100
+
101
+ def koelectra_input(tokenizer, str, device = None, max_seq_len = 512):
102
+ index_of_words = tokenizer.encode(str)
103
+ # token_type_ids = [0] * len(index_of_words)
104
+ attention_mask = [1] * len(index_of_words)
105
+
106
+ # Padding Length
107
+ padding_length = max_seq_len - len(index_of_words)
108
+
109
+ # Zero Padding
110
+ index_of_words += [0] * padding_length
111
+ # token_type_ids += [0] * padding_length
112
+ attention_mask += [0] * padding_length
113
+
114
+ data = {
115
+ 'input_ids': torch.tensor([index_of_words]).to(device),
116
+ 'attention_mask': torch.tensor([attention_mask]).to(device),
117
+ }
118
+ return data
portToHub.py CHANGED
@@ -1,17 +1,23 @@
1
  import torch
2
- from kogpt2Test.dialogLM.Kogpt2 import DialogKoGPT2
3
- from kogpt2_transformers import get_kogpt2_tokenizer
4
- save_ckpt_path = './sentiment-classification.pth'
 
 
 
 
5
 
 
 
6
 
7
  ctx = "cuda" if torch.cuda.is_available() else "cpu"
8
  device = torch.device(ctx)
9
 
10
- model = DialogKoGPT2()
11
  checkpoint = torch.load(save_ckpt_path, map_location=device)
12
  model.load_state_dict(checkpoint['model_state_dict'])
13
  model.eval()
14
- tokenizer = get_kogpt2_tokenizer()
15
 
16
  ## repo
17
  MODEL_SAVE_REPO = 'Koelectra-five-sentiment-classification' # ex) 'my-bert-fine-tuned'
 
1
  import torch
2
+ #from kogpt2Test.dialogLM.Kogpt2 import DialogKoGPT2
3
+ from KoELECTRA import *
4
+ #from kogpt2_transformers import get_kogpt2_tokenizer
5
+ from transformers import ElectraModel, ElectraTokenizer
6
+ from transformers import (
7
+ ElectraConfig,
8
+ ElectraTokenizer,
9
 
10
+ )
11
+ save_ckpt_path = './sentiment-classification.pth'
12
 
13
  ctx = "cuda" if torch.cuda.is_available() else "cpu"
14
  device = torch.device(ctx)
15
 
16
+ model = koElectraForSequenceClassification()
17
  checkpoint = torch.load(save_ckpt_path, map_location=device)
18
  model.load_state_dict(checkpoint['model_state_dict'])
19
  model.eval()
20
+ tokenizer = ElectraTokenizer()
21
 
22
  ## repo
23
  MODEL_SAVE_REPO = 'Koelectra-five-sentiment-classification' # ex) 'my-bert-fine-tuned'