Commit
·
8df7fbc
1
Parent(s):
ee3d854
Upload roberta_layers.py
Browse files- roberta_layers.py +6 -6
roberta_layers.py
CHANGED
|
@@ -739,9 +739,9 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 739 |
'If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for '
|
| 740 |
'bi-directional self-attention.')
|
| 741 |
|
| 742 |
-
self.
|
| 743 |
-
self.
|
| 744 |
-
self.
|
| 745 |
|
| 746 |
# Initialize weights and apply final processing
|
| 747 |
self.post_init()
|
|
@@ -820,7 +820,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 820 |
|
| 821 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 822 |
|
| 823 |
-
outputs = self.
|
| 824 |
input_ids,
|
| 825 |
attention_mask=attention_mask,
|
| 826 |
token_type_ids=token_type_ids,
|
|
@@ -905,7 +905,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|
| 905 |
self.num_labels = config.num_labels
|
| 906 |
self.config = config
|
| 907 |
|
| 908 |
-
self.
|
| 909 |
classifier_dropout = (config.classifier_dropout
|
| 910 |
if config.classifier_dropout is not None else
|
| 911 |
config.hidden_dropout_prob)
|
|
@@ -969,7 +969,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|
| 969 |
|
| 970 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 971 |
|
| 972 |
-
outputs = self.
|
| 973 |
input_ids,
|
| 974 |
attention_mask=attention_mask,
|
| 975 |
token_type_ids=token_type_ids,
|
|
|
|
| 739 |
'If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for '
|
| 740 |
'bi-directional self-attention.')
|
| 741 |
|
| 742 |
+
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
| 743 |
+
self.lm_head = RobertaOnlyMLMHead(config,
|
| 744 |
+
self.roberta.embeddings.word_embeddings.weight)
|
| 745 |
|
| 746 |
# Initialize weights and apply final processing
|
| 747 |
self.post_init()
|
|
|
|
| 820 |
|
| 821 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 822 |
|
| 823 |
+
outputs = self.roberta(
|
| 824 |
input_ids,
|
| 825 |
attention_mask=attention_mask,
|
| 826 |
token_type_ids=token_type_ids,
|
|
|
|
| 905 |
self.num_labels = config.num_labels
|
| 906 |
self.config = config
|
| 907 |
|
| 908 |
+
self.roberta = RobertaModel(config)
|
| 909 |
classifier_dropout = (config.classifier_dropout
|
| 910 |
if config.classifier_dropout is not None else
|
| 911 |
config.hidden_dropout_prob)
|
|
|
|
| 969 |
|
| 970 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 971 |
|
| 972 |
+
outputs = self.roberta(
|
| 973 |
input_ids,
|
| 974 |
attention_mask=attention_mask,
|
| 975 |
token_type_ids=token_type_ids,
|