Upload configuration_hybridna.py with huggingface_hub
Browse files
configuration_hybridna.py
CHANGED
|
@@ -9,7 +9,8 @@ logger = logging.get_logger(__name__)
|
|
| 9 |
|
| 10 |
class HybriDNAConfig(PretrainedConfig):
|
| 11 |
r"""
|
| 12 |
-
This is the configuration class to store the configuration of a [`HybriDNA`] model.
|
|
|
|
| 13 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
| 14 |
documentation from [`PretrainedConfig`] for more information.
|
| 15 |
Args:
|
|
@@ -94,8 +95,6 @@ class HybriDNAConfig(PretrainedConfig):
|
|
| 94 |
Minimum clamping value of the `dt_proj.bias` layer initialization.
|
| 95 |
time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`):
|
| 96 |
Accepted range of time step values.
|
| 97 |
-
output_router_logits (`bool`, *optional*, defaults to `False`):
|
| 98 |
-
Whether to return the router logits from mixture-of-experts layers.
|
| 99 |
"""
|
| 100 |
|
| 101 |
model_type = "hybridna"
|
|
@@ -135,10 +134,8 @@ class HybriDNAConfig(PretrainedConfig):
|
|
| 135 |
time_step_max=0.1,
|
| 136 |
time_step_floor=1e-4,
|
| 137 |
time_step_limit=(0.0, float("inf")),
|
| 138 |
-
output_router_logits=False,
|
| 139 |
**kwargs,
|
| 140 |
):
|
| 141 |
-
self.output_router_logits = output_router_logits
|
| 142 |
self.vocab_size = vocab_size
|
| 143 |
self.tie_word_embeddings = tie_word_embeddings
|
| 144 |
self.hidden_size = hidden_size
|
|
@@ -176,13 +173,11 @@ class HybriDNAConfig(PretrainedConfig):
|
|
| 176 |
self.time_step_max = time_step_max
|
| 177 |
self.time_step_floor = time_step_floor
|
| 178 |
|
| 179 |
-
|
| 180 |
super().__init__(
|
| 181 |
pad_token_id=pad_token_id,
|
| 182 |
bos_token_id=bos_token_id,
|
| 183 |
eos_token_id=eos_token_id,
|
| 184 |
tie_word_embeddings=tie_word_embeddings,
|
| 185 |
-
output_router_logits=output_router_logits,
|
| 186 |
**kwargs,
|
| 187 |
)
|
| 188 |
|
|
|
|
| 9 |
|
| 10 |
class HybriDNAConfig(PretrainedConfig):
|
| 11 |
r"""
|
| 12 |
+
This is the configuration class to store the configuration of a [`HybriDNA`] model.
|
| 13 |
+
HybriDNA is a hybrid Mamba-Attention model for DNA sequence modeling.
|
| 14 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
| 15 |
documentation from [`PretrainedConfig`] for more information.
|
| 16 |
Args:
|
|
|
|
| 95 |
Minimum clamping value of the `dt_proj.bias` layer initialization.
|
| 96 |
time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`):
|
| 97 |
Accepted range of time step values.
|
|
|
|
|
|
|
| 98 |
"""
|
| 99 |
|
| 100 |
model_type = "hybridna"
|
|
|
|
| 134 |
time_step_max=0.1,
|
| 135 |
time_step_floor=1e-4,
|
| 136 |
time_step_limit=(0.0, float("inf")),
|
|
|
|
| 137 |
**kwargs,
|
| 138 |
):
|
|
|
|
| 139 |
self.vocab_size = vocab_size
|
| 140 |
self.tie_word_embeddings = tie_word_embeddings
|
| 141 |
self.hidden_size = hidden_size
|
|
|
|
| 173 |
self.time_step_max = time_step_max
|
| 174 |
self.time_step_floor = time_step_floor
|
| 175 |
|
|
|
|
| 176 |
super().__init__(
|
| 177 |
pad_token_id=pad_token_id,
|
| 178 |
bos_token_id=bos_token_id,
|
| 179 |
eos_token_id=eos_token_id,
|
| 180 |
tie_word_embeddings=tie_word_embeddings,
|
|
|
|
| 181 |
**kwargs,
|
| 182 |
)
|
| 183 |
|