YYLY66
/

mRNABERT

@@ -8,7 +8,7 @@ class BertConfig(PretrainedConfig):
     def __init__(
         self,
-        alibi_starting_size: int = 512,
         attention_probs_dropout_prob: float = 0.0,
         **kwargs,
     ):
@@ -17,7 +17,7 @@ class BertConfig(PretrainedConfig):
         Args:
             alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
                 create when initializing the model. You should be able to ignore this parameter in most cases.
-                Defaults to 512.
             attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
                 (otherwise, Flash Attention will be off by default). Defaults to 0.0.
         """

     def __init__(
         self,
+        alibi_starting_size: int = 1024,
         attention_probs_dropout_prob: float = 0.0,
         **kwargs,
     ):
         Args:
             alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
                 create when initializing the model. You should be able to ignore this parameter in most cases.
             attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
                 (otherwise, Flash Attention will be off by default). Defaults to 0.0.
         """