adalbertojunior
/

mrpt

Model card Files Files and versions

adalbertojunior commited on May 13, 2023

Commit

35a3983

·

1 Parent(s): cb68421

Upload roberta_layers.py

Files changed (1) hide show

roberta_layers.py +11 -11

roberta_layers.py CHANGED Viewed

@@ -34,7 +34,7 @@ try:
 except ImportError as e:
     xformers_available=False
-logger = logging.getLogger(__name__)
 _CHECKPOINT_FOR_DOC = "roberta-base"
 _CONFIG_FOR_DOC = "RobertaConfig"
@@ -479,9 +479,9 @@ class RobertaEncoder(nn.Module):
         if self.gradient_checkpointing and self.training:
             if use_cache:
-                logger.warning_once(
-                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
-                )
                 use_cache = False
         next_decoder_cache = () if use_cache else None
@@ -873,8 +873,8 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        if not config.is_decoder:
-            logger.warning("If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`")
         self.roberta = RobertaModel(config, add_pooling_layer=False)
         self.lm_head = RobertaLMHead(config)
@@ -1027,11 +1027,11 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        if config.is_decoder:
-            logger.warning(
-                "If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for "
-                "bi-directional self-attention."
-            )
         self.roberta = RobertaModel(config, add_pooling_layer=False)
         self.lm_head = RobertaLMHead(config)

 except ImportError as e:
     xformers_available=False
+# logger = logging.getLogger(__name__)
 _CHECKPOINT_FOR_DOC = "roberta-base"
 _CONFIG_FOR_DOC = "RobertaConfig"
         if self.gradient_checkpointing and self.training:
             if use_cache:
+                # logger.warning_once(
+                #     "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                # )
                 use_cache = False
         next_decoder_cache = () if use_cache else None
     def __init__(self, config):
         super().__init__(config)
+        # if not config.is_decoder:
+        #     logger.warning("If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`")
         self.roberta = RobertaModel(config, add_pooling_layer=False)
         self.lm_head = RobertaLMHead(config)
     def __init__(self, config):
         super().__init__(config)
+        # if config.is_decoder:
+        #     logger.warning(
+        #         "If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for "
+        #         "bi-directional self-attention."
+        #     )
         self.roberta = RobertaModel(config, add_pooling_layer=False)
         self.lm_head = RobertaLMHead(config)