Mishamq commited on
Commit
2dd0ea2
·
verified ·
1 Parent(s): 9e08f93

Upload configuration_hybridna.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_hybridna.py +2 -7
configuration_hybridna.py CHANGED
@@ -9,7 +9,8 @@ logger = logging.get_logger(__name__)
9
 
10
  class HybriDNAConfig(PretrainedConfig):
11
  r"""
12
- This is the configuration class to store the configuration of a [`HybriDNA`] model. It is adopted from the AI21 lab work of Jamba Model.
 
13
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
14
  documentation from [`PretrainedConfig`] for more information.
15
  Args:
@@ -94,8 +95,6 @@ class HybriDNAConfig(PretrainedConfig):
94
  Minimum clamping value of the `dt_proj.bias` layer initialization.
95
  time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`):
96
  Accepted range of time step values.
97
- output_router_logits (`bool`, *optional*, defaults to `False`):
98
- Whether to return the router logits from mixture-of-experts layers.
99
  """
100
 
101
  model_type = "hybridna"
@@ -135,10 +134,8 @@ class HybriDNAConfig(PretrainedConfig):
135
  time_step_max=0.1,
136
  time_step_floor=1e-4,
137
  time_step_limit=(0.0, float("inf")),
138
- output_router_logits=False,
139
  **kwargs,
140
  ):
141
- self.output_router_logits = output_router_logits
142
  self.vocab_size = vocab_size
143
  self.tie_word_embeddings = tie_word_embeddings
144
  self.hidden_size = hidden_size
@@ -176,13 +173,11 @@ class HybriDNAConfig(PretrainedConfig):
176
  self.time_step_max = time_step_max
177
  self.time_step_floor = time_step_floor
178
 
179
-
180
  super().__init__(
181
  pad_token_id=pad_token_id,
182
  bos_token_id=bos_token_id,
183
  eos_token_id=eos_token_id,
184
  tie_word_embeddings=tie_word_embeddings,
185
- output_router_logits=output_router_logits,
186
  **kwargs,
187
  )
188
 
 
9
 
10
  class HybriDNAConfig(PretrainedConfig):
11
  r"""
12
+ This is the configuration class to store the configuration of a [`HybriDNA`] model.
13
+ HybriDNA is a hybrid Mamba-Attention model for DNA sequence modeling.
14
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
15
  documentation from [`PretrainedConfig`] for more information.
16
  Args:
 
95
  Minimum clamping value of the `dt_proj.bias` layer initialization.
96
  time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`):
97
  Accepted range of time step values.
 
 
98
  """
99
 
100
  model_type = "hybridna"
 
134
  time_step_max=0.1,
135
  time_step_floor=1e-4,
136
  time_step_limit=(0.0, float("inf")),
 
137
  **kwargs,
138
  ):
 
139
  self.vocab_size = vocab_size
140
  self.tie_word_embeddings = tie_word_embeddings
141
  self.hidden_size = hidden_size
 
173
  self.time_step_max = time_step_max
174
  self.time_step_floor = time_step_floor
175
 
 
176
  super().__init__(
177
  pad_token_id=pad_token_id,
178
  bos_token_id=bos_token_id,
179
  eos_token_id=eos_token_id,
180
  tie_word_embeddings=tie_word_embeddings,
 
181
  **kwargs,
182
  )
183