Commit ·
f566ec9
1
Parent(s): 8df7fbc
Upload roberta_layers.py
Browse files- roberta_layers.py +8 -8
roberta_layers.py
CHANGED
|
@@ -584,7 +584,7 @@ class RobertaModel(RobertaPreTrainedModel):
|
|
| 584 |
to the last attention block of shape [batch_size, sequence_length, hidden_size],
|
| 585 |
`pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
|
| 586 |
classifier pretrained on top of the hidden state associated to the first character of the
|
| 587 |
-
input (`
|
| 588 |
Example usage:
|
| 589 |
```python
|
| 590 |
# Already been converted into WordPiece token ids
|
|
@@ -747,7 +747,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 747 |
self.post_init()
|
| 748 |
|
| 749 |
@classmethod
|
| 750 |
-
def from_composer(
|
| 751 |
pretrained_checkpoint,
|
| 752 |
state_dict=None,
|
| 753 |
cache_dir=None,
|
|
@@ -756,7 +756,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 756 |
*inputs,
|
| 757 |
**kwargs):
|
| 758 |
"""Load from pre-trained."""
|
| 759 |
-
model =
|
| 760 |
if from_tf:
|
| 761 |
raise ValueError(
|
| 762 |
'Mosaic BERT does not support loading TensorFlow weights.')
|
|
@@ -779,10 +779,10 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 779 |
return model
|
| 780 |
|
| 781 |
def get_output_embeddings(self):
|
| 782 |
-
return self.
|
| 783 |
|
| 784 |
def set_output_embeddings(self, new_embeddings):
|
| 785 |
-
self.
|
| 786 |
|
| 787 |
def forward(
|
| 788 |
self,
|
|
@@ -836,7 +836,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 836 |
)
|
| 837 |
|
| 838 |
sequence_output = outputs[0]
|
| 839 |
-
prediction_scores = self.
|
| 840 |
|
| 841 |
loss = None
|
| 842 |
if labels is not None:
|
|
@@ -916,7 +916,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|
| 916 |
self.post_init()
|
| 917 |
|
| 918 |
@classmethod
|
| 919 |
-
def from_composer(
|
| 920 |
pretrained_checkpoint,
|
| 921 |
state_dict=None,
|
| 922 |
cache_dir=None,
|
|
@@ -925,7 +925,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|
| 925 |
*inputs,
|
| 926 |
**kwargs):
|
| 927 |
"""Load from pre-trained."""
|
| 928 |
-
model =
|
| 929 |
if from_tf:
|
| 930 |
raise ValueError(
|
| 931 |
'Mosaic BERT does not support loading TensorFlow weights.')
|
|
|
|
| 584 |
to the last attention block of shape [batch_size, sequence_length, hidden_size],
|
| 585 |
`pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
|
| 586 |
classifier pretrained on top of the hidden state associated to the first character of the
|
| 587 |
+
input (`lm_head`) to train on the Next-Sentence task (see BERT's paper).
|
| 588 |
Example usage:
|
| 589 |
```python
|
| 590 |
# Already been converted into WordPiece token ids
|
|
|
|
| 747 |
self.post_init()
|
| 748 |
|
| 749 |
@classmethod
|
| 750 |
+
def from_composer(lm_head,
|
| 751 |
pretrained_checkpoint,
|
| 752 |
state_dict=None,
|
| 753 |
cache_dir=None,
|
|
|
|
| 756 |
*inputs,
|
| 757 |
**kwargs):
|
| 758 |
"""Load from pre-trained."""
|
| 759 |
+
model = lm_head(config, *inputs, **kwargs)
|
| 760 |
if from_tf:
|
| 761 |
raise ValueError(
|
| 762 |
'Mosaic BERT does not support loading TensorFlow weights.')
|
|
|
|
| 779 |
return model
|
| 780 |
|
| 781 |
def get_output_embeddings(self):
|
| 782 |
+
return self.lm_head.predictions.decoder
|
| 783 |
|
| 784 |
def set_output_embeddings(self, new_embeddings):
|
| 785 |
+
self.lm_head.predictions.decoder = new_embeddings
|
| 786 |
|
| 787 |
def forward(
|
| 788 |
self,
|
|
|
|
| 836 |
)
|
| 837 |
|
| 838 |
sequence_output = outputs[0]
|
| 839 |
+
prediction_scores = self.lm_head(sequence_output)
|
| 840 |
|
| 841 |
loss = None
|
| 842 |
if labels is not None:
|
|
|
|
| 916 |
self.post_init()
|
| 917 |
|
| 918 |
@classmethod
|
| 919 |
+
def from_composer(lm_head,
|
| 920 |
pretrained_checkpoint,
|
| 921 |
state_dict=None,
|
| 922 |
cache_dir=None,
|
|
|
|
| 925 |
*inputs,
|
| 926 |
**kwargs):
|
| 927 |
"""Load from pre-trained."""
|
| 928 |
+
model = lm_head(config, *inputs, **kwargs)
|
| 929 |
if from_tf:
|
| 930 |
raise ValueError(
|
| 931 |
'Mosaic BERT does not support loading TensorFlow weights.')
|