| """Caduceus config for Hugging Face. |
| |
| """ |
|
|
| from typing import Optional, Union |
|
|
| from transformers import PretrainedConfig |
|
|
|
|
| class CaduceusConfig(PretrainedConfig): |
| """Config that extends the original MambaConfig with params relevant to bi-directionality and RC equivariance.""" |
| model_type = "caduceus" |
|
|
| def __init__( |
| self, |
| |
| d_model: int = 2560, |
| n_layer: int = 64, |
| vocab_size: int = 50277, |
| ssm_cfg: Optional[dict] = None, |
| rms_norm: bool = True, |
| residual_in_fp32: bool = True, |
| fused_add_norm: bool = True, |
| pad_vocab_size_multiple: int = 8, |
| |
| |
| norm_epsilon: float = 1e-5, |
| |
| |
| initializer_cfg: Optional[dict] = None, |
| |
| |
| bidirectional: bool = True, |
| bidirectional_strategy: Union[str, None] = "add", |
| bidirectional_weight_tie: bool = True, |
| rcps: bool = False, |
| complement_map: Optional[dict] = None, |
| **kwargs, |
| ): |
| super().__init__(**kwargs) |
| self.d_model = d_model |
| self.n_layer = n_layer |
| self.vocab_size = vocab_size |
| self.ssm_cfg = ssm_cfg |
| self.rms_norm = rms_norm |
| self.residual_in_fp32 = residual_in_fp32 |
| self.fused_add_norm = fused_add_norm |
| self.pad_vocab_size_multiple = pad_vocab_size_multiple |
| self.norm_epsilon = norm_epsilon |
| self.initializer_cfg = initializer_cfg |
| self.bidirectional = bidirectional |
| self.bidirectional_strategy = bidirectional_strategy |
| self.bidirectional_weight_tie = bidirectional_weight_tie |
| self.rcps = rcps |
| self.complement_map = complement_map |
|
|