| from transformers import PretrainedConfig | |
| class MBZTestConfig(PretrainedConfig): | |
| model_type = 'mbz-test' | |
| def __init__( | |
| self, | |
| n_layers=36, | |
| d_model=4096, | |
| n_heads=32, | |
| n_vocab=50257, | |
| d_head=128, | |
| **kwargs | |
| ): | |
| self.n_layers = n_layers | |
| self.d_model = d_model | |
| self.n_heads = n_heads | |
| self.n_vocab = n_vocab | |
| self.d_head = d_head | |
| super().__init__(**kwargs) | |