YYLY66 commited on
Commit
88ca927
·
verified ·
1 Parent(s): 7343852

Upload configuration_bert.py

Browse files
Files changed (1) hide show
  1. configuration_bert.py +2 -2
configuration_bert.py CHANGED
@@ -8,7 +8,7 @@ class BertConfig(PretrainedConfig):
8
 
9
  def __init__(
10
  self,
11
- alibi_starting_size: int = 512,
12
  attention_probs_dropout_prob: float = 0.0,
13
  **kwargs,
14
  ):
@@ -17,7 +17,7 @@ class BertConfig(PretrainedConfig):
17
  Args:
18
  alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
19
  create when initializing the model. You should be able to ignore this parameter in most cases.
20
- Defaults to 512.
21
  attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
22
  (otherwise, Flash Attention will be off by default). Defaults to 0.0.
23
  """
 
8
 
9
  def __init__(
10
  self,
11
+ alibi_starting_size: int = 1024,
12
  attention_probs_dropout_prob: float = 0.0,
13
  **kwargs,
14
  ):
 
17
  Args:
18
  alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
19
  create when initializing the model. You should be able to ignore this parameter in most cases.
20
+
21
  attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
22
  (otherwise, Flash Attention will be off by default). Defaults to 0.0.
23
  """