Update modeling.py
Browse files- modeling.py +41 -45
modeling.py
CHANGED
|
@@ -1,51 +1,47 @@
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
-
|
| 4 |
-
from transformers
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
self.
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
outputs = self.roberta(
|
| 28 |
-
input_ids=input_ids,
|
| 29 |
-
attention_mask=attention_mask,
|
| 30 |
-
token_type_ids=token_type_ids,
|
| 31 |
)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
logits = self.classifier(pooled_output)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
if self.num_labels == 1:
|
| 40 |
-
loss_fct = nn.MSELoss()
|
| 41 |
-
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
| 42 |
-
else:
|
| 43 |
-
loss_fct = nn.BCEWithLogitsLoss()
|
| 44 |
-
loss = loss_fct(logits, labels.float())
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from transformers import PreTrainedModel, AutoModel, AutoConfig, ModelOutput
|
| 5 |
+
|
| 6 |
+
class Encoder(nn.Module):
|
| 7 |
+
def __init__(self, base_encoder):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.encoder = base_encoder
|
| 10 |
+
|
| 11 |
+
def forward(self, inputs):
|
| 12 |
+
outputs = self.encoder(**inputs, output_hidden_states=True)
|
| 13 |
+
last_hidden = outputs.hidden_states[-1]
|
| 14 |
+
mask = inputs["attention_mask"].unsqueeze(-1).float()
|
| 15 |
+
pooled = (last_hidden * mask).sum(1) / mask.sum(1).clamp(min=1e-9)
|
| 16 |
+
return F.normalize(pooled, p=2, dim=1)
|
| 17 |
+
|
| 18 |
+
class Classifier(nn.Module):
|
| 19 |
+
def __init__(self, input_dim=768, num_classes=28):
|
| 20 |
+
super().__init__()
|
| 21 |
+
self.mlp = nn.Sequential(
|
| 22 |
+
nn.Linear(input_dim, 512),
|
| 23 |
+
nn.LayerNorm(512),
|
| 24 |
+
nn.GELU(),
|
| 25 |
+
nn.Dropout(0.25),
|
| 26 |
+
nn.Linear(512, num_classes),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
)
|
| 28 |
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
return self.mlp(x)
|
|
|
|
| 31 |
|
| 32 |
+
class RobertaEmoPillars(PreTrainedModel):
|
| 33 |
+
config_class = AutoConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
def __init__(self, config):
|
| 36 |
+
super().__init__(config)
|
| 37 |
+
base_encoder = AutoModel.from_config(config) # IMPORTANT: use from_config
|
| 38 |
+
self.encoder = Encoder(base_encoder)
|
| 39 |
+
self.classifier = Classifier(input_dim=base_encoder.config.hidden_size,
|
| 40 |
+
num_classes=config.num_labels)
|
| 41 |
+
self.post_init() # ensures HF weights init
|
| 42 |
+
|
| 43 |
+
def forward(self, input_ids=None, attention_mask=None):
|
| 44 |
+
inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
|
| 45 |
+
emb = self.encoder(inputs)
|
| 46 |
+
logits = self.classifier(emb)
|
| 47 |
+
return ModelOutput(logits=logits)
|