| from transformers import PretrainedConfig | |
| class AbLang2PairedConfig(PretrainedConfig): | |
| model_type = "ablang2-paired" | |
| def __init__( | |
| self, | |
| vocab_size=26, | |
| hidden_embed_size=480, | |
| n_attn_heads=20, | |
| n_encoder_blocks=12, | |
| padding_tkn=21, | |
| mask_tkn=23, | |
| layer_norm_eps=1e-12, | |
| a_fn="swiglu", | |
| dropout=0.0, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.vocab_size = vocab_size | |
| self.hidden_embed_size = hidden_embed_size | |
| self.hidden_size = hidden_embed_size # Add this for Hugging Face compatibility | |
| self.n_attn_heads = n_attn_heads | |
| self.num_attention_heads = n_attn_heads # Add this for Hugging Face compatibility | |
| self.num_hidden_layers = n_encoder_blocks # Add this for Hugging Face compatibility | |
| self.n_encoder_blocks = n_encoder_blocks | |
| self.padding_tkn = padding_tkn | |
| self.mask_tkn = mask_tkn | |
| self.layer_norm_eps = layer_norm_eps | |
| self.a_fn = a_fn | |
| self.dropout = dropout |