adalbertojunior commited on
Commit
f566ec9
·
1 Parent(s): 8df7fbc

Upload roberta_layers.py

Browse files
Files changed (1) hide show
  1. roberta_layers.py +8 -8
roberta_layers.py CHANGED
@@ -584,7 +584,7 @@ class RobertaModel(RobertaPreTrainedModel):
584
  to the last attention block of shape [batch_size, sequence_length, hidden_size],
585
  `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
586
  classifier pretrained on top of the hidden state associated to the first character of the
587
- input (`CLS`) to train on the Next-Sentence task (see BERT's paper).
588
  Example usage:
589
  ```python
590
  # Already been converted into WordPiece token ids
@@ -747,7 +747,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
747
  self.post_init()
748
 
749
  @classmethod
750
- def from_composer(cls,
751
  pretrained_checkpoint,
752
  state_dict=None,
753
  cache_dir=None,
@@ -756,7 +756,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
756
  *inputs,
757
  **kwargs):
758
  """Load from pre-trained."""
759
- model = cls(config, *inputs, **kwargs)
760
  if from_tf:
761
  raise ValueError(
762
  'Mosaic BERT does not support loading TensorFlow weights.')
@@ -779,10 +779,10 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
779
  return model
780
 
781
  def get_output_embeddings(self):
782
- return self.cls.predictions.decoder
783
 
784
  def set_output_embeddings(self, new_embeddings):
785
- self.cls.predictions.decoder = new_embeddings
786
 
787
  def forward(
788
  self,
@@ -836,7 +836,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
836
  )
837
 
838
  sequence_output = outputs[0]
839
- prediction_scores = self.cls(sequence_output)
840
 
841
  loss = None
842
  if labels is not None:
@@ -916,7 +916,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
916
  self.post_init()
917
 
918
  @classmethod
919
- def from_composer(cls,
920
  pretrained_checkpoint,
921
  state_dict=None,
922
  cache_dir=None,
@@ -925,7 +925,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
925
  *inputs,
926
  **kwargs):
927
  """Load from pre-trained."""
928
- model = cls(config, *inputs, **kwargs)
929
  if from_tf:
930
  raise ValueError(
931
  'Mosaic BERT does not support loading TensorFlow weights.')
 
584
  to the last attention block of shape [batch_size, sequence_length, hidden_size],
585
  `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
586
  classifier pretrained on top of the hidden state associated to the first character of the
587
+ input (`lm_head`) to train on the Next-Sentence task (see BERT's paper).
588
  Example usage:
589
  ```python
590
  # Already been converted into WordPiece token ids
 
747
  self.post_init()
748
 
749
  @classmethod
750
+ def from_composer(lm_head,
751
  pretrained_checkpoint,
752
  state_dict=None,
753
  cache_dir=None,
 
756
  *inputs,
757
  **kwargs):
758
  """Load from pre-trained."""
759
+ model = lm_head(config, *inputs, **kwargs)
760
  if from_tf:
761
  raise ValueError(
762
  'Mosaic BERT does not support loading TensorFlow weights.')
 
779
  return model
780
 
781
  def get_output_embeddings(self):
782
+ return self.lm_head.predictions.decoder
783
 
784
  def set_output_embeddings(self, new_embeddings):
785
+ self.lm_head.predictions.decoder = new_embeddings
786
 
787
  def forward(
788
  self,
 
836
  )
837
 
838
  sequence_output = outputs[0]
839
+ prediction_scores = self.lm_head(sequence_output)
840
 
841
  loss = None
842
  if labels is not None:
 
916
  self.post_init()
917
 
918
  @classmethod
919
+ def from_composer(lm_head,
920
  pretrained_checkpoint,
921
  state_dict=None,
922
  cache_dir=None,
 
925
  *inputs,
926
  **kwargs):
927
  """Load from pre-trained."""
928
+ model = lm_head(config, *inputs, **kwargs)
929
  if from_tf:
930
  raise ValueError(
931
  'Mosaic BERT does not support loading TensorFlow weights.')