| from transformers import PretrainedConfig | |
| class DuchifatConfig(PretrainedConfig): | |
| model_type = "duchifat_v2" | |
| def __init__( | |
| self, | |
| vocab_size=50257, | |
| hidden_size=768, | |
| num_layers=12, | |
| nhead=12, | |
| max_seq=1024, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.vocab_size = vocab_size | |
| self.hidden_size = hidden_size | |
| self.num_layers = num_layers | |
| self.nhead = nhead | |
| self.max_seq = max_seq |