| from transformers.configuration_utils import PretrainedConfig | |
| class TinyGPTConfig(PretrainedConfig): | |
| model_type = "tinygpt" | |
| def __init__( | |
| self, | |
| vocab_size=30522, | |
| n_layers=4, | |
| n_heads=4, | |
| d_model=256, | |
| d_ff=1024, | |
| max_seq_len=512, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.vocab_size = vocab_size | |
| self.n_layers = n_layers | |
| self.n_heads = n_heads | |
| self.d_model = d_model | |
| self.d_ff = d_ff | |
| self.max_seq_len = max_seq_len |