Commit ·
35a3983
1
Parent(s): cb68421
Upload roberta_layers.py
Browse files- roberta_layers.py +11 -11
roberta_layers.py
CHANGED
|
@@ -34,7 +34,7 @@ try:
|
|
| 34 |
except ImportError as e:
|
| 35 |
xformers_available=False
|
| 36 |
|
| 37 |
-
logger = logging.getLogger(__name__)
|
| 38 |
|
| 39 |
_CHECKPOINT_FOR_DOC = "roberta-base"
|
| 40 |
_CONFIG_FOR_DOC = "RobertaConfig"
|
|
@@ -479,9 +479,9 @@ class RobertaEncoder(nn.Module):
|
|
| 479 |
|
| 480 |
if self.gradient_checkpointing and self.training:
|
| 481 |
if use_cache:
|
| 482 |
-
logger.warning_once(
|
| 483 |
-
|
| 484 |
-
)
|
| 485 |
use_cache = False
|
| 486 |
|
| 487 |
next_decoder_cache = () if use_cache else None
|
|
@@ -873,8 +873,8 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
|
|
| 873 |
def __init__(self, config):
|
| 874 |
super().__init__(config)
|
| 875 |
|
| 876 |
-
if not config.is_decoder:
|
| 877 |
-
|
| 878 |
|
| 879 |
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
| 880 |
self.lm_head = RobertaLMHead(config)
|
|
@@ -1027,11 +1027,11 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|
| 1027 |
def __init__(self, config):
|
| 1028 |
super().__init__(config)
|
| 1029 |
|
| 1030 |
-
if config.is_decoder:
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
|
| 1036 |
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
| 1037 |
self.lm_head = RobertaLMHead(config)
|
|
|
|
| 34 |
except ImportError as e:
|
| 35 |
xformers_available=False
|
| 36 |
|
| 37 |
+
# logger = logging.getLogger(__name__)
|
| 38 |
|
| 39 |
_CHECKPOINT_FOR_DOC = "roberta-base"
|
| 40 |
_CONFIG_FOR_DOC = "RobertaConfig"
|
|
|
|
| 479 |
|
| 480 |
if self.gradient_checkpointing and self.training:
|
| 481 |
if use_cache:
|
| 482 |
+
# logger.warning_once(
|
| 483 |
+
# "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
|
| 484 |
+
# )
|
| 485 |
use_cache = False
|
| 486 |
|
| 487 |
next_decoder_cache = () if use_cache else None
|
|
|
|
| 873 |
def __init__(self, config):
|
| 874 |
super().__init__(config)
|
| 875 |
|
| 876 |
+
# if not config.is_decoder:
|
| 877 |
+
# logger.warning("If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`")
|
| 878 |
|
| 879 |
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
| 880 |
self.lm_head = RobertaLMHead(config)
|
|
|
|
| 1027 |
def __init__(self, config):
|
| 1028 |
super().__init__(config)
|
| 1029 |
|
| 1030 |
+
# if config.is_decoder:
|
| 1031 |
+
# logger.warning(
|
| 1032 |
+
# "If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for "
|
| 1033 |
+
# "bi-directional self-attention."
|
| 1034 |
+
# )
|
| 1035 |
|
| 1036 |
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
| 1037 |
self.lm_head = RobertaLMHead(config)
|