annnli commited on
Commit
4991f82
·
verified ·
1 Parent(s): d44be9c

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_roberta_cl.py +3 -66
modeling_roberta_cl.py CHANGED
@@ -2,35 +2,10 @@ import torch
2
  import torch.nn as nn
3
  import torch.nn.functional as F
4
  import torch.distributed as dist
5
- from torch import Tensor
6
 
7
  import transformers
8
- from transformers import RobertaTokenizer
9
- from transformers.models.roberta.modeling_roberta import RobertaForSequenceClassification, RobertaClassificationHead, RobertaLMHead
10
- from transformers.activations import gelu
11
- from transformers.file_utils import (
12
- add_code_sample_docstrings,
13
- add_start_docstrings,
14
- add_start_docstrings_to_model_forward,
15
- replace_return_docstrings,
16
- )
17
- from transformers.modeling_outputs import SequenceClassifierOutput, BaseModelOutputWithPoolingAndCrossAttentions
18
-
19
- class MLPLayer(nn.Module):
20
- """
21
- Head for getting sentence representations over RoBERTa/BERT's CLS representation.
22
- """
23
-
24
- def __init__(self, config):
25
- super().__init__()
26
- self.dense = nn.Linear(config.hidden_size, config.hidden_size)
27
- self.activation = nn.Tanh()
28
-
29
- def forward(self, features, **kwargs):
30
- x = self.dense(features)
31
- x = self.activation(x)
32
-
33
- return x
34
 
35
  class ResidualBlock(nn.Module):
36
  def __init__(self, dim):
@@ -129,8 +104,6 @@ def cl_forward(cls,
129
  output_attentions=None,
130
  output_hidden_states=None,
131
  return_dict=None,
132
- mlm_input_ids=None,
133
- mlm_labels=None,
134
  latter_sentiment_spoof_mask=None,
135
  ):
136
  return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
@@ -139,7 +112,6 @@ def cl_forward(cls,
139
  # original + cls.model_args.num_paraphrased + cls.model_args.num_negative
140
  num_sent = input_ids.size(1)
141
 
142
- mlm_outputs = None
143
  # Flatten input for encoding
144
  input_ids = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
145
  attention_mask = attention_mask.view((-1, attention_mask.size(-1))) # (bs * num_sent len)
@@ -159,21 +131,6 @@ def cl_forward(cls,
159
  return_dict=True,
160
  )
161
 
162
- # MLM auxiliary objective
163
- if mlm_input_ids is not None:
164
- mlm_input_ids = mlm_input_ids.view((-1, mlm_input_ids.size(-1)))
165
- mlm_outputs = cls.roberta(
166
- mlm_input_ids,
167
- attention_mask=attention_mask,
168
- token_type_ids=token_type_ids,
169
- position_ids=position_ids,
170
- head_mask=head_mask,
171
- inputs_embeds=inputs_embeds,
172
- output_attentions=output_attentions,
173
- output_hidden_states=False,
174
- return_dict=True,
175
- )
176
-
177
  # Pooling
178
  sequence_output = outputs[0] # (bs*num_sent, seq_len, hidden)
179
  pooler_output = cls.classifier(sequence_output) # (bs*num_sent, hidden)
@@ -216,14 +173,6 @@ def cl_forward(cls,
216
  loss_triplet += F.relu(cls.sim(ori, neg) * cls.model_args.temp - cls.sim(ori, pos) * cls.model_args.temp + cls.model_args.margin)
217
  loss_triplet /= (batch_size * cls.model_args.num_paraphrased * len(spoofing_cnames))
218
 
219
- # Calculate loss for MLM
220
- if mlm_outputs is not None and mlm_labels is not None:
221
- raise NotImplementedError
222
- # mlm_labels = mlm_labels.view(-1, mlm_labels.size(-1))
223
- # prediction_scores = cls.lm_head(mlm_outputs.last_hidden_state)
224
- # masked_lm_loss = loss_fct(prediction_scores.view(-1, cls.config.vocab_size), mlm_labels.view(-1))
225
- # loss_cl = loss_cl + cls.model_args.mlm_weight * masked_lm_loss
226
-
227
  # Calculate loss for uniform perturbation and unbiased token preference
228
  def sign_loss(x):
229
  row = torch.abs(torch.mean(torch.mean(x, dim=0)))
@@ -331,8 +280,7 @@ class RobertaForCL(RobertaForSequenceClassification):
331
 
332
  self.classifier = RobertaClassificationHeadForEmbedding(config)
333
 
334
- if self.model_args and getattr(self.model_args, "do_mlm", False):
335
- self.lm_head = RobertaLMHead(config)
336
  cl_init(self, config)
337
 
338
  self.map = SemanticModel(input_dim=768)
@@ -340,13 +288,6 @@ class RobertaForCL(RobertaForSequenceClassification):
340
  # Initialize weights and apply final processing
341
  self.post_init()
342
 
343
- def initialize_mlp_weights(self, pretrained_model_state_dict):
344
- """
345
- Initialize MLP weights using the pretrained classifier's weights.
346
- """
347
- self.mlp.dense.weight.data = pretrained_model_state_dict.classifier.dense.weight.data.clone()
348
- self.mlp.dense.bias.data = pretrained_model_state_dict.classifier.dense.bias.data.clone()
349
-
350
  def forward(self,
351
  input_ids=None,
352
  attention_mask=None,
@@ -359,8 +300,6 @@ class RobertaForCL(RobertaForSequenceClassification):
359
  output_hidden_states=None,
360
  return_dict=None,
361
  sent_emb=False,
362
- mlm_input_ids=None,
363
- mlm_labels=None,
364
  latter_sentiment_spoof_mask=None,
365
  ):
366
  if sent_emb:
@@ -388,8 +327,6 @@ class RobertaForCL(RobertaForSequenceClassification):
388
  output_attentions=output_attentions,
389
  output_hidden_states=output_hidden_states,
390
  return_dict=return_dict,
391
- mlm_input_ids=mlm_input_ids,
392
- mlm_labels=mlm_labels,
393
  latter_sentiment_spoof_mask=latter_sentiment_spoof_mask,
394
  )
395
 
 
2
  import torch.nn as nn
3
  import torch.nn.functional as F
4
  import torch.distributed as dist
 
5
 
6
  import transformers
7
+ from transformers.models.roberta.modeling_roberta import RobertaForSequenceClassification, RobertaClassificationHead
8
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class ResidualBlock(nn.Module):
11
  def __init__(self, dim):
 
104
  output_attentions=None,
105
  output_hidden_states=None,
106
  return_dict=None,
 
 
107
  latter_sentiment_spoof_mask=None,
108
  ):
109
  return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
 
112
  # original + cls.model_args.num_paraphrased + cls.model_args.num_negative
113
  num_sent = input_ids.size(1)
114
 
 
115
  # Flatten input for encoding
116
  input_ids = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
117
  attention_mask = attention_mask.view((-1, attention_mask.size(-1))) # (bs * num_sent len)
 
131
  return_dict=True,
132
  )
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # Pooling
135
  sequence_output = outputs[0] # (bs*num_sent, seq_len, hidden)
136
  pooler_output = cls.classifier(sequence_output) # (bs*num_sent, hidden)
 
173
  loss_triplet += F.relu(cls.sim(ori, neg) * cls.model_args.temp - cls.sim(ori, pos) * cls.model_args.temp + cls.model_args.margin)
174
  loss_triplet /= (batch_size * cls.model_args.num_paraphrased * len(spoofing_cnames))
175
 
 
 
 
 
 
 
 
 
176
  # Calculate loss for uniform perturbation and unbiased token preference
177
  def sign_loss(x):
178
  row = torch.abs(torch.mean(torch.mean(x, dim=0)))
 
280
 
281
  self.classifier = RobertaClassificationHeadForEmbedding(config)
282
 
283
+ if self.model_args:
 
284
  cl_init(self, config)
285
 
286
  self.map = SemanticModel(input_dim=768)
 
288
  # Initialize weights and apply final processing
289
  self.post_init()
290
 
 
 
 
 
 
 
 
291
  def forward(self,
292
  input_ids=None,
293
  attention_mask=None,
 
300
  output_hidden_states=None,
301
  return_dict=None,
302
  sent_emb=False,
 
 
303
  latter_sentiment_spoof_mask=None,
304
  ):
305
  if sent_emb:
 
327
  output_attentions=output_attentions,
328
  output_hidden_states=output_hidden_states,
329
  return_dict=return_dict,
 
 
330
  latter_sentiment_spoof_mask=latter_sentiment_spoof_mask,
331
  )
332